def moving_win(chromosome, chrlength, winlength, k): #chro = getWhole(chromosome) winstart = 0 winend = winlength out = "winS winE dist AT\n" mdict = getMito(k) seq = 'temp' while chrlength - winstart > winlength: seq = k_genes.get_sequence(winstart, winend, chromosome) N_count = seq.count('N') #print(N_count) kmers = kmer_distr.kmer_distr(seq, k) #print(kmers) dist = findDist(mdict, kmers) toAdd = str(winstart) + " " + str(winend) + " " + str(dist) + " " #only mito hard code because ugh # if winstart == 640000 or winstart == 239456000 or winstart == 241904000: #print(len(seq) - N_count) leng = len(seq) if leng - N_count != 0: toAdd += str( bpcontent.findATcontent(seq) / ((leng - N_count) * 1.0)) + '\n' # else: # toAdd += "NA\n" print(toAdd) if N_count < 100: out += toAdd winstart = winend winend = winstart + winlength return out
def getRMito(k): source = open(GOLDEN_PATH_DIR + "Homo_sapiens.GRCh38.dna.chromosome.MT.fa", 'rU') m = combine_second_strand.secondStrand(k_genes.get_sequence( 1, 16569, 'MT')) #print(m) return create_kmers(m, k, True)
def marKer(k, chrocomp): mit = getMito(k) LMito = [] for key in mit: if mit[key] > 0.01: LMito.append(key) print(LMito) start = 0 end = 3000 LPos = [] leng = len(getWhole(chrocomp)) while end < leng: count = 0 seq = k_genes.get_sequence(start, end, chrocomp) for mer in LMito: if seq.find(mer) != -1: count += 1 print(count) print(start) if count > 5: LPos.append(start) start += 1500 end += 1500 return LPos
def comp_mit(chromosome, chrlength, winlength, k): winstart = 0 winend = winlength out = "winS winE dist strand\n" mf = getMito(k) mr = getRMito(k) while chrlength - winstart > winlength: shortstep = False seq = k_genes.get_sequence(winstart, winend, chromosome) kmers = create_kmers(seq, k) #print(kmers) # if len(kmers) < 10: # dist = -1 #else: distF = findDist(kmers, mf) distR = findDist(kmers, mr) if distF <= distR: if distF < 3: shortstep = True toAdd = str(winstart) + " " + str(winend) + " " + str( distF) + " +\n" else: if distR < 3: shortstep = True toAdd = str(winstart) + " " + str(winend) + " " + str( distR) + ' -\n' #print(toAdd) if seq.count('N') < 100: out += toAdd if shortstep == True: winstart += winlength / 10 winend += winlength / 10 else: winstart += winlength / 2 winend += winlength / 2 return out
def getMito(k): source = open(GOLDEN_PATH_DIR + "Homo_sapiens.GRCh38.dna.chromosome.MT.fa", 'rU') m = k_genes.get_sequence(1, 16569, 'MT') return create_kmers(m, k, True)