def peptideEncoding(dna, protein): """ Find motifs in the positions dna which generate the protein """ reverseLookup = defaultdict(list) for k, v in rnaCoding.iteritems():reverseLookup[v].append(k.replace('U', 'T')) motifs = [ "".join(prod) for prod in product(*[reverseLookup[p] for p in protein])] motifs += [reverseComplement(m) for m in motifs] motifLength = 3 * len(protein) return [dna[i:i + motifLength] for i in range(len(dna) - motifLength) if dna[i:i + motifLength] in motifs]
def syntegenyBlockConstruction(kLength, sequence1, sequence2): kmerPos = defaultdict(list) for i in range(len(sequence1) - kLength + 1): kmer = sequence1[i:i + kLength] kmerPos[kmer].append(i) kmerPos[reverseComplement(kmer)].append(i) for i in range(len(sequence2) - kLength + 1): kmer = sequence2[i:i + kLength] for pos in kmerPos[kmer]: yield(pos, i)
def syntegenyBlockConstruction(kLength, sequence1, sequence2): kmerPos = defaultdict(list) for i in range(len(sequence1) - kLength + 1): kmer = sequence1[i:i + kLength] kmerPos[kmer].append(i) kmerPos[reverseComplement(kmer)].append(i) for i in range(len(sequence2) - kLength + 1): kmer = sequence2[i:i + kLength] for pos in kmerPos[kmer]: yield (pos, i)
def peptideEncoding(dna, protein): """ Find motifs in the positions dna which generate the protein """ reverseLookup = defaultdict(list) for k, v in rnaCoding.iteritems(): reverseLookup[v].append(k.replace('U', 'T')) motifs = [ "".join(prod) for prod in product(*[reverseLookup[p] for p in protein]) ] motifs += [reverseComplement(m) for m in motifs] motifLength = 3 * len(protein) return [ dna[i:i + motifLength] for i in range(len(dna) - motifLength) if dna[i:i + motifLength] in motifs ]
def testReverseComplement2(self): self.assertEqual(hw.reverseComplement("CCCGGG"), "CCCGGG")
def testReverseComplement1(self): self.assertEqual(hw.reverseComplement("GATTACA"), "TGTAATC")
def testReverseComplement(self): dna = 'AAAACCCGGT' self.assertEqual('ACCGGGTTTT', reverseComplement(dna))
def Find(self, sequence): length = len(sequence) rc = reverseComplement(sequence) fwd = [(m.start(), m.end(), 0) for m in re.finditer(self.regularExpression, sequence)] rev = [(length-m.end(), length-m.start(), 1) for m in re.finditer(self.regularExpression, rc)] return fwd + rev