def countKmersInitialized(seq, kLength, kmers): k = len(seq) # print seq i = 0 while i + kLength <= k: newWord = seq[i:i + kLength] # print newWord if kmers.has_key(newWord): kmers[newWord] += 1 # print 'Found key forward' elif kmers.has_key(aabpPyLib.reverseComplement(newWord, 'DNA')): kmers[aabpPyLib.reverseComplement(newWord, 'DNA')] += 1 # print 'Found key reverse' else: # print 'Key should have been found, but was not' pass i += 1 return kmers
def countKmersFile(filename, kLength): kmers = {} fasta = {} identifier = 0 fasta = inputFastaSeq(filename) for entry in fasta: k = len(fasta[entry]) i = 0 while i + kLength <= k: newWord = fasta[entry][i:i + kLength] if kmers.has_key(newWord): kmers[newWord] += 1 elif kmers.has_key(aabpPyLib.reverseComplement(newWord, 'DNA')): kmers[aabpPyLib.reverseComplement(newWord, 'DNA')] += 1 else: kmers[newWord] = 1 i += 1 return kmers
def makeIsoform(kLength, seqLength, seqPortion, seqOriginal): data = seqOriginal alphabet = {0:'A', 1:'C', 2:'G', 3:'T'} while data == seqOriginal: data = seqPortion while len(data) < seqLength: if len(data) < seqLength: data = data + alphabet[random.randint(0, len(alphabet)-1)] else: newNuc = alphabet[random.randint(0, len(alphabet)-1)] if data[-1 * kLength -1: -1] + newNuc not in data and aabpPyLib.reverseComplement(data[-1 * kLength - 1: -1] + newNuc, 'DNA') not in data: data = data + newNuc return data
def makeSingleKmerPlusOneCountData(kLength, seqLength, repSeq, Ns): data = repSeq if Ns: alphabet = {0:'A', 1:'C', 2:'G', 3:'T', 4:'N'} else: alphabet = {0:'A', 1:'C', 2:'G', 3:'T'} while len(data) < seqLength: if len(data) in range(0, kLength): data = data + alphabet[random.randint(0, len(alphabet)-1)] else: newNuc = alphabet[random.randint(0, len(alphabet)-1)] if data[-1 * kLength -1: -1] + newNuc not in data and aabpPyLib.reverseComplement(data[-1 * kLength - 1: -1] + newNuc, 'DNA') not in data: data = data + newNuc return data
def makeIsoform(kLength, seqLength, seqPortion, seqOriginal): data = seqOriginal alphabet = {0: 'A', 1: 'C', 2: 'G', 3: 'T'} while data == seqOriginal: data = seqPortion while len(data) < seqLength: if len(data) < seqLength: data = data + alphabet[random.randint(0, len(alphabet) - 1)] else: newNuc = alphabet[random.randint(0, len(alphabet) - 1)] if data[-1 * kLength - 1: -1] + newNuc not in data and aabpPyLib.reverseComplement( data[-1 * kLength - 1:-1] + newNuc, 'DNA') not in data: data = data + newNuc return data
def makeSingleKmerPlusOneCountData(kLength, seqLength, repSeq, Ns): data = repSeq if Ns: alphabet = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: 'N'} else: alphabet = {0: 'A', 1: 'C', 2: 'G', 3: 'T'} while len(data) < seqLength: if len(data) in range(0, kLength): data = data + alphabet[random.randint(0, len(alphabet) - 1)] else: newNuc = alphabet[random.randint(0, len(alphabet) - 1)] if data[-1 * kLength - 1:-1] + newNuc not in data and aabpPyLib.reverseComplement( data[-1 * kLength - 1:-1] + newNuc, 'DNA') not in data: data = data + newNuc return data