Example #1
0
def allTLSeqCounter():
    for i in range(1, numofruns+1):
        x = TSeqCounter(filenames[i])
        y = LSeqCounter(filenames[i])
        b = barcodechecker(filenames[i])
        TSeqNums.update({i:x})
        LSeqNums.update({i:y})
        Barcodevalues.update({i:b})
Example #2
0
def listinit(mink, maxk):
    l = lvalues[1]
    for i in range(0,numofruns+1):
        poslist.append({})
        rposlist.append({})
        try:
            avg = barcodechecker(filenames[i])
        except:
            continue
        for k in range(mink, maxk+1):
            poslist[i].update({k:{}})
            rposlist[i].update({k:{}})
            for x in range(0,(l+1-k-(2*avg))):
                poslist[i][k].update({x:[]})
                rposlist[i][k].update({x:[]})
Example #3
0
def listinit():
    for i in range(0,numofruns+1):
        poslist.append({})
        rposlist.append({})
        consensuslist.append({})
        cooccurencelist.append({})
        cooccurenceindexlist.append({})
        gaplist.append({})
        gapstdev.append({})
    for j in range(1, numofruns+1):
        if extype == "SELEX":
            l = lvalues[j]
        if knownbarcode == False:
            if nobarcode == True:
                avg5 = 0
                avg3 = 0
            if nobarcode == False:
                avg5 = barcodechecker(filenames[j])
                avg3 = avg5
        if knownbarcode == True:
            avg5 = len(barcodeprimers53[j][0])
            avg3 = len(barcodeprimers53[j][1])
        for k in range(mink, maxk+1):
            poslist[j][k] = {}
            rposlist[j][k] = {}
            consensuslist[j][k] = {}
            cooccurencelist[j][k] = {}
            cooccurenceindexlist[j][k] = {}
            gaplist[j][k] = {}
            gapstdev[j][k] = {}
            for n in range(1, nmotifs+1):
                poslist[j][k][n] = {}
                rposlist[j][k][n] = {}
                consensuslist[j][k][n] = {}
                cooccurencelist[j][k][n] = {}
                cooccurenceindexlist[j][k][n] = {}
                gaplist[j][k][n] = []
                gapstdev[j][k][n] = {}
                gapstdev[j][k][n]["mean"] = 0
                gapstdev[j][k][n]["stdev"] = 0
                if extype == "SELEX":
                    for x in range(0,(l+1-k-(avg5+avg3))):
                        poslist[j][k][n].update({x:0})
                        rposlist[j][k][n].update({x:0})
                else:
                    poslist[j][k][n] = []
                    rposlist[j][k][n] = []
Example #4
0
def seqwtfbsfinder(FileName, k):
    TFBSSeqNum = 0
    l = lvalues[(ufilenames[FileName])]
    fastaFileName = open(FileName, "r")
    avg = barcodechecker(FileName)
    for line in fastaFileName:
        c = 0
        line = line.strip()
        if line.startswith(">"):
            continue
        if len(line) == l and "N" not in line:
            for x in range(0,((len(line)+1)-k)-(2*avg)):
                kmers = str(line[x+avg:x+k+avg])
                if len(kmers) > 0 and line.count(kmers) == 1:
                    hkmer = kmer2hash(kmers)
                    if hkmer in hamminglist2[(ufilenames[FileName])][k]:
                        if c == 0:
                            TFBSSeqNum += 1
                            c += 1

        else:
            continue

        if revcompwanted == True:
            if len(line) == l and "N" not in line:
                for x in range(0,((len(line)+1)-k)-(2*avg)):
                    rkmers = revComp(line[x+avg:x+k+avg])
                    if len(rkmers) > 0 and (line.count(kmers) + line.count(rkmers)) == 1:
                         hkmer = kmer2hash(rkmers)
                         if hkmer in hamminglist2[(ufilenames[FileName])][k]:
                             if c == 0:
                                 TFBSSeqNum += 1
                                 c += 1
            else:
                continue
    return TFBSSeqNum
Example #5
0
def CreatePosList(FileName, k, runnum):
    fastaFileName = open(FileName, "r")
    avg = barcodechecker(FileName)
    for line in fastaFileName:
        line = line.strip()
        if line.startswith(">"):
            continue
        if len(line) == lvalues[runnum] and "N" not in line:
            for x in range(0,((len(line)+1)-k)-(2*avg)):
                kmers = str(line[x+avg:x+k+avg])
                if len(kmers) > 0 and line.count(kmers) == 1:
                    hkmers = kmer2hash(kmers)
                    if hkmers in hamminglist2[runnum][k]:
                        poslist[runnum][k][x].append(hkmers)

        if revcompwanted == True:
            if len(line) == lvalues[runnum] and "N" not in line:
                for x in range(0,((len(line)+1)-k)-(2*avg)):
                    kmers = str(line[x+avg:x+k+avg])
                    rkmers = revComp(line[x+avg:x+k+avg])
                    if len(rkmers) > 0 and (line.count(rkmers) + line.count(kmers)) == 1:
                        hrkmers = kmer2hash(rkmers)
                        if hrkmers in hamminglist2[runnum][k]:
                            rposlist[runnum][k][x].append(hrkmers)
Example #6
0
def CreatePosListNORMAL(FileName, k, runnum):
    fastaFileName = open(FileName, "r")
    if knownbarcode == False:
        if nobarcode == True:
            avg5 = 0
            avg3 = 0
        if nobarcode == False:
            avg5 = barcodechecker(FileName)
            avg3 = avg5
    if knownbarcode == True:
        avg5 = len(barcodeprimers53[runnum][0])
        avg3 = len(barcodeprimers53[runnum][1])

    firstline = fastaFileName.readline()
    firstline = firstline.strip()
    if firstline.startswith(">"):
        filetype = "fasta"
    if firstline.startswith("@"):
        filetype = "fastq"

    for n in range(1, nmotifs+1):
        seq1 = kmer2hash(consensuslist[runnum][k][n])
        seq2 = kmer2hash(revComp(consensuslist[runnum][k][n]))
        for sequence in SeqIO.parse(FileName, filetype):
            line = str(sequence.seq)
            lenline = len(line)
            c = 0
            LSeqNums[runnum] += 1
            TSeqNums[runnum] += 1
            done = set()
            noisefilter = {"F": [],"R": []}
            for x in range(0,(lenline+1-k)-(avg5+avg3)):
                try:
                    kmers = kmer2hash(str(line[x+avg5:x+k+avg5]))
                    #if kmers not in done:
                    if kmers in hamminglist2[runnum][k][n]:
                        poslist[runnum][k][n].append(x/(lenline-k+1-(avg5+avg3)))
                        done.add(kmers)
                        if c == 0:
                            numoftfbsseq[runnum][k] += 1
                            c += 1
                    if kmers in rhamminglist2[runnum][k][n]:
                        rposlist[runnum][k][n].append((x)/(lenline-k+1-(avg5+avg3)))
                        done.add(kmers)
                        if c == 0:
                            numoftfbsseq[runnum][k] += 1
                            c += 1
                    if kmers == seq1:
                        noisefilter["F"].append(x)
                    if kmers == seq2:
                        noisefilter["R"].append(x)
                except:
                    continue
                if x == (len(line)-k)-(avg5+avg3):
                    if seq1 & seq2 in done:
                        numf = len(noisefilter["F"])
                        numr = len(noisefilter["R"])
                        a = 0
                        b = 0
                        mindiff = 1000
                        while (a < numf and b < numr):
                            if (k < abs(noisefilter["F"][a] - noisefilter["R"][b]) < mindiff):
                                mindiff = abs(noisefilter["F"][a] - noisefilter["R"][b])
                            if (noisefilter["F"][a] < noisefilter["R"][b]):
                                a += 1
                            else:
                                b += 1
                        if k < mindiff <= (k+10):
                            cooccurencelist[runnum][k][n]["fr"] += 1
                            gaplist[runnum][k][n].append(mindiff-k)
                    elif seq1 in done and seq2 not in done:
                        cooccurencelist[runnum][k][n]["f"] += 1
                    elif seq2 in done and seq1 not in done:
                        cooccurencelist[runnum][k][n]["r"] += 1