예제 #1
0
def moving_win(chromosome, chrlength, winlength, k):
    #chro = getWhole(chromosome)
    winstart = 0
    winend = winlength
    out = "winS winE dist AT\n"
    mdict = getMito(k)
    seq = 'temp'
    while chrlength - winstart > winlength:
        seq = k_genes.get_sequence(winstart, winend, chromosome)
        N_count = seq.count('N')
        #print(N_count)
        kmers = kmer_distr.kmer_distr(seq, k)
        #print(kmers)
        dist = findDist(mdict, kmers)
        toAdd = str(winstart) + " " + str(winend) + " " + str(dist) + " "

        #only mito hard code because ugh
        # if winstart == 640000 or winstart == 239456000 or winstart == 241904000:
        #print(len(seq) - N_count)
        leng = len(seq)
        if leng - N_count != 0:
            toAdd += str(
                bpcontent.findATcontent(seq) / ((leng - N_count) * 1.0)) + '\n'
        # else:
        # toAdd += "NA\n"

        print(toAdd)
        if N_count < 100:
            out += toAdd
        winstart = winend
        winend = winstart + winlength
    return out
예제 #2
0
def getRMito(k):
    source = open(GOLDEN_PATH_DIR + "Homo_sapiens.GRCh38.dna.chromosome.MT.fa",
                  'rU')
    m = combine_second_strand.secondStrand(k_genes.get_sequence(
        1, 16569, 'MT'))
    #print(m)
    return create_kmers(m, k, True)
예제 #3
0
def marKer(k, chrocomp):
    mit = getMito(k)
    LMito = []
    for key in mit:
        if mit[key] > 0.01:
            LMito.append(key)
    print(LMito)
    start = 0
    end = 3000
    LPos = []
    leng = len(getWhole(chrocomp))
    while end < leng:
        count = 0
        seq = k_genes.get_sequence(start, end, chrocomp)
        for mer in LMito:
            if seq.find(mer) != -1:
                count += 1
        print(count)
        print(start)
        if count > 5:
            LPos.append(start)
        start += 1500
        end += 1500
    return LPos
예제 #4
0
def comp_mit(chromosome, chrlength, winlength, k):
    winstart = 0
    winend = winlength
    out = "winS winE dist strand\n"
    mf = getMito(k)
    mr = getRMito(k)
    while chrlength - winstart > winlength:
        shortstep = False
        seq = k_genes.get_sequence(winstart, winend, chromosome)
        kmers = create_kmers(seq, k)
        #print(kmers)
        # if len(kmers) < 10:
        #     dist = -1
        #else:
        distF = findDist(kmers, mf)
        distR = findDist(kmers, mr)
        if distF <= distR:
            if distF < 3:
                shortstep = True
            toAdd = str(winstart) + " " + str(winend) + " " + str(
                distF) + " +\n"
        else:
            if distR < 3:
                shortstep = True
            toAdd = str(winstart) + " " + str(winend) + " " + str(
                distR) + ' -\n'
        #print(toAdd)
        if seq.count('N') < 100:
            out += toAdd
        if shortstep == True:
            winstart += winlength / 10
            winend += winlength / 10
        else:
            winstart += winlength / 2
            winend += winlength / 2
    return out
예제 #5
0
def getMito(k):
    source = open(GOLDEN_PATH_DIR + "Homo_sapiens.GRCh38.dna.chromosome.MT.fa",
                  'rU')
    m = k_genes.get_sequence(1, 16569, 'MT')
    return create_kmers(m, k, True)