def helper(b, difference=difference1): '''By entering a dictionary with couldbeenglish strings and their startidx, this helper function will be used repeatedly to find sequences of these strings that will produce a couldbeenglish string on the other side as well. It will return a dict with those sequences that give a could be english string on the other side with their startindex Example: "renewal" -> "ending i" "[s]ending" -> " renew" ''' couldbeword2 = { } #safe all words that have english outcome and only use those with value 1, to be sure for key in b: #We'll now go through the english ngrams we found c = findwordswithsequence( b[key]) #would return english words with that ngram d = [] for word in c: if len(word) > 5: d.append(word) for word in d: #lets go through the words upper = b[key].upper() wherengraminword = word.find(upper) #where in the word is ngram wordstartindex = key - wherengraminword #at what index should word start if wordstartindex < 0: continue word = word.lower() dif = addwordtodatindex(difference, word, wordstartindex) #print("---------------------------------------------") #print("word:", word, "dif:", dif , "index:", wordstartindex) #print("word at index:", difference0[wordstartindex:wordstartindex+len(word)]) if couldbeenglish( dif ) == True: #if the word gives a english outcome it is good # There are occurences where there are more then 1 couldbeenglish possibilities at the same wordstartindex # To make sure we have the right one, we only safe it when there is only 1 option if word in popularity_dict: print(dif, 'wordofsequence is:', word) permission = input( "Do you give permission to the combi above? (y/n)") if permission == "y": if wordstartindex in couldbeword2: print(couldbeword2[wordstartindex]) replace = input("Do you want to replace? (y/n)") if replace == 'y': couldbeword2[wordstartindex] = word elif replace == 'n': continue else: couldbeword2[wordstartindex] = word elif permission == 'n': continue return couldbeword2
def test_couldbeenglish_capitalafterpunct(self): self.assertEqual(couldbeenglish("ing.Wi", simple = True), False)
def test_couldbeenglish_I(self): self.assertEqual(couldbeenglish("I", simple = True), True)
def test_couldbeenglish_numberinbetween(self): self.assertEqual(couldbeenglish("in9, wi", simple = True), False)
def test_couldbeenglish_dubblepunct(self): self.assertEqual(couldbeenglish("ing,. wi", simple = True), False)
def test_couldbeenglish_capital(self): self.assertEqual(couldbeenglish("inG, wi", simple = True), False)
def test_couldbeenglish_good(self): self.assertEqual(couldbeenglish("ing, wi", simple = True), True)