def _longest_match_ratio(str1, str2): sq = SequenceMatcher(lambda x: x == " ", str1, str2) match = sq.find_longest_match(0, len(str1), 0, len(str2)) return np_utils._try_divide(match.size, min(len(str1), len(str2)))
# Manupulating date time dt = datetime.now() print dt.strftime('%m - %d - %Y :: ') print dt.strftime('%x %X') exit() # Hacking difflib # stud = '1**2*' # our = '1*2' our = "*\n**\n***\n****\n" stud = " *\n * *\n * * *\n* * * *\n" space_match = 36 s = SequenceMatcher(lambda x: x == " ", stud, our) def_ratio = s.ratio() comb_len = len(stud) + len(our) def_match = def_ratio * comb_len new_match = def_match + space_match new_ratio = new_match / comb_len print "space match : ", space_match print "old ratio : ", def_ratio print "new ratio : ", new_ratio print our print stud # exit() out_ref = open('out_ref.txt')
def compareTitles(title1, title2, opt=options.matcher): if opt < 1: return SequenceMatcher(None, title1, title2).ratio() > opt else: return title1 == title2
def avalanche(a, b): return 100.0 - (SequenceMatcher(None, a, b).ratio() * 100)
def get_similarity(l1, l2): return SequenceMatcher(None, l1, l2).ratio()
def similar(string_A, string_B): return SequenceMatcher(None, string_A, string_B).ratio()
def getSimilarRatio(str1, str2): return SequenceMatcher(None, str1, str2).ratio()
def _get_patterns_similarity(pattern_a, pattern_b): similarity = SequenceMatcher(None, pattern_a, pattern_b).ratio() return round(similarity, 2)
def _diff_ranges(seq1, seq2): opcodes = SequenceMatcher(a=seq1, b=seq2, autojunk=False).get_opcodes() return list(filter(lambda i: i[0] != 'equal', opcodes))
def compare(a, b): return SequenceMatcher(None, a, b).ratio()
dummy_input = tf.random.uniform((7, 80, 64), dtype=tf.float32) mel_pred = model_tf(dummy_input, training=False) # get tf variables tf_vars = model_tf.weights # match variable names with fuzzy logic torch_var_names = list(state_dict.keys()) tf_var_names = [we.name for we in model_tf.weights] var_map = [] for tf_name in tf_var_names: # skip re-mapped layer names if tf_name in [name[0] for name in var_map]: continue tf_name_edited = convert_tf_name(tf_name) ratios = [SequenceMatcher(None, torch_name, tf_name_edited).ratio() for torch_name in torch_var_names] max_idx = np.argmax(ratios) matching_name = torch_var_names[max_idx] del torch_var_names[max_idx] var_map.append((tf_name, matching_name)) # pass weights tf_vars = transfer_weights_torch_to_tf(tf_vars, dict(var_map), state_dict) # Compare TF and TORCH models # check embedding outputs model.eval() dummy_input_torch = torch.ones((1, 80, 10)) dummy_input_tf = tf.convert_to_tensor(dummy_input_torch.numpy()) dummy_input_tf = tf.transpose(dummy_input_tf, perm=[0, 2, 1]) dummy_input_tf = tf.expand_dims(dummy_input_tf, 2)
wordList_2 = [] #creates an empty list f = open("checkMe2.txt", "a") for word_1 in sentence_2.split( ): #loops through every word in the file if word_1 in text.split( ): #if word from checkMe text file is in EnglishWords text file f.write( word_1 + " " ) #then write the word to a new text file "checkMe2" correct_2 += 1 elif word_1 not in text.split(): #if word not it dictionary incorrect_2 += 1 for i in range( 0, 84094 ): #loops through every word in the EnglishWords file score1 = SequenceMatcher(None, word_1, alt[i]).ratio( ) #compares the wrong word with each word in EnglishWords file and gives a ratio of how similar the words are if score1 >= 0.75: #if the ratio similarity of the word in greater than 0.75 print( "\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" "\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500" "\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510" ) print( "\u2502 W O R D N O T F O U N D \u2502" ) print( "\u2502 \u2502" ) print("\u2502 " + word_1, (" " * (23 - len(word_1))), " \u2502")
def transfer_casing_for_similar_text(text_w_casing, text_wo_casing): """Transferring the casing from one text to another - for similar (not matching) text 1. It will use `difflib`'s `SequenceMatcher` to identify the different type of changes needed to turn `text_w_casing` into `text_wo_casing` 2. For each type of change: - for inserted sections: - it will transfer the casing from the prior character - if no character before or the character before is the\ space, then it will transfer the casing from the following\ character - for deleted sections: no case transfer is required - for equal sections: just swap out the text with the original,\ the one with the casings, as otherwise the two are the same - replaced sections: transfer the casing using\ :meth:`transfer_casing_for_matching_text` if the two has the\ same length, otherwise transfer character-by-character and\ carry the last casing over to any additional characters. Parameters ---------- text_w_casing : str Text with varied casing text_wo_casing : str Text that is in lowercase only Returns ------- text_wo_casing : str If `text_wo_casing` is empty c : str Text with the content of `text_wo_casing` but the casing of `text_w_casing` Raises ------ ValueError If `text_w_casing` is empty """ if not text_wo_casing: return text_wo_casing if not text_w_casing: raise ValueError("We need 'text_w_casing' to know what " "casing to transfer!") _sm = SequenceMatcher(None, text_w_casing.lower(), text_wo_casing) # we will collect the case_text: c = '' # get the operation codes describing the differences between the # two strings and handle them based on the per operation code rules for tag, i1, i2, j1, j2 in _sm.get_opcodes(): # Print the operation codes from the SequenceMatcher: # print("{:7} a[{}:{}] --> b[{}:{}] {!r:>8} --> {!r}" # .format(tag, i1, i2, j1, j2, # text_w_casing[i1:i2], # text_wo_casing[j1:j2])) # inserted character(s) if tag == "insert": # if this is the first character and so there is no # character on the left of this or the left of it a space # then take the casing from the following character if i1 == 0 or text_w_casing[i1 - 1] == " ": if text_w_casing[i1] and text_w_casing[i1].isupper(): c += text_wo_casing[j1:j2].upper() else: c += text_wo_casing[j1:j2].lower() else: # otherwise just take the casing from the prior # character if text_w_casing[i1 - 1].isupper(): c += text_wo_casing[j1:j2].upper() else: c += text_wo_casing[j1:j2].lower() elif tag == "delete": # for deleted characters we don't need to do anything pass elif tag == "equal": # for 'equal' we just transfer the text from the # text_w_casing, as anyhow they are equal (without the # casing) c += text_w_casing[i1:i2] elif tag == "replace": _w_casing = text_w_casing[i1:i2] _wo_casing = text_wo_casing[j1:j2] # if they are the same length, the transfer is easy if len(_w_casing) == len(_wo_casing): c += transfer_casing_for_matching_text( text_w_casing=_w_casing, text_wo_casing=_wo_casing) else: # if the replaced has a different length, then we # transfer the casing character-by-character and using # the last casing to continue if we run out of the # sequence _last = "lower" for w, wo in zip_longest(_w_casing, _wo_casing): if w and wo: if w.isupper(): c += wo.upper() _last = "upper" else: c += wo.lower() _last = "lower" elif not w and wo: # once we ran out of 'w', we will carry over # the last casing to any additional 'wo' # characters c += wo.upper() if _last == "upper" else wo.lower() return c
def get_similarity(a, b): return SequenceMatcher(None, a, b).ratio()
def compare_values(a, b): return SequenceMatcher(None, a.lower().strip(), b.lower().strip()).ratio()
def similar(a, b): # Returns a decimal representing the similiarity between the two strings. return SequenceMatcher(None, a, b).ratio()
def similar(a, b): return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def _check_str_similarity(str_1, str_2): """Checks the similarity of two strings""" return SequenceMatcher(None, str_1, str_2).ratio()
def areSimilar(a, b): return SequenceMatcher(None, a, b).ratio() > 0.8
def typing_game(): game_start = str(input("\nStart Game? (Y/N): ")).lower() if game_start == 'y': print("\nGet ready...\n") t.sleep(1) print("Get Set...\n") t.sleep(1) print("Go!\n") t.sleep(0.5) random_sentence = normal_sentences() number_of_words = len(random_sentence.split()) time_start = dt.datetime.now() player_input = input(("Your sentence is: \n" + '"' + random_sentence + '"' '\n')) if player_input == random_sentence: time_end = dt.datetime.now() print("Complete!\n") t.sleep(0.5) time = time_end - time_start time_seconds = round(time.total_seconds(), 2) print("Your total time was", time_seconds,'seconds!\n') wpm = float(round(((60 / time_seconds) * number_of_words), 2)) print("You have a typing speed of", wpm, "WPM\n") accuracy = 100 print("You have an accuracy of", accuracy, '%\n') #scores = score_list() name = input("What is your name?: ") print("Added to high scores list!") t.sleep(1) add_score(name, wpm) main_menu() else: player_input_arr = [] random_sentence_arr = [] for char in player_input: player_input_arr.append(char) for i in random_sentence: random_sentence_arr.append(i) time_end = dt.datetime.now() print("Complete!\n") t.sleep(0.5) time = time_end - time_start time_seconds = round(time.total_seconds(), 2) print("Your total time was", time_seconds,'seconds!\n') wpm = float(round(((60 / time_seconds) * number_of_words), 2)) similarity = round(((SequenceMatcher(None, player_input_arr, random_sentence_arr).ratio()) * 100),2) print("You typed the sentence with ", similarity, " % accuracy!") print("\nYou have a typing speed of", wpm, "WPM\n") print("\nGet 100% accuracy to be added to the high score list!") #name = input("What is your name?: ") #t.sleep(1) #add_score(name, wpm) main_menu() else: if game_start in ['N', 'n']: print("\nReturning to the Main Menu\n") t.sleep(1) main_menu() else: print("Returning to the Main Menu") main_menu()
def sort(self, strings): return sorted( strings, key=lambda str: SequenceMatcher(a=self._base_str, b=str).ratio(), reverse=True)
def file_compare(outfile, expected, resultsfile, results): #file compare function comp = open(expected).read() output = open(outfile).read() if len(output) >= 84 and len(comp) >= 84: output = output[70:len(output) - 14] comp = comp[70:len(comp) - 14] elif len(output) < 84 and len(comp) >= 84: comp = comp[70:len(comp) - 14] elif len(comp) < 84 and len(output) >= 84: output = output[70:len(output) - 14] thing = '' thing2 = '' complength = len(comp) outlength = len(output) start = False j = 0 while len(output) > j: if output[j] == '\\': thing = thing + output[j] output = output[:j] + output[j + 1:] start = True j = j - 1 elif start == True and (output[j] == " " or output[j] == "\n"): start = False output = output[:j] + "1" + output[j:] elif start == True: thing = thing + output[j] output = output[:j] + output[j + 1:] j = j - 1 j = j + 1 start = False j = 0 while len(comp) > j: if comp[j] == '\\': thing2 = thing2 + comp[j] start = True comp = comp[:j] + comp[j + 1:] j = j - 1 elif start == True and (comp[j] == " " or comp[j] == "\n"): start = False comp = comp[:j] + "1" + comp[j:] elif start == True: thing2 = thing2 + comp[j] comp = comp[:j] + comp[j + 1:] j = j - 1 j = j + 1 splitthing = thing.split("\\") splitthing2 = thing2.split("\\") if len(splitthing) > len(splitthing2): n = len(splitthing2) else: n = len(splitthing) j = 0 i = 0 while n > j: if SequenceMatcher(None, splitthing[j], splitthing2[j]).ratio() == 1: i = i + 1 j = j + 1 symbolratio = 0 biggestlength = 0 originallen = 0 if len(output) > len(comp): originallen = outlength - (len(thing1)) + (len(splitthing) - 1) symbolratio = (i - 1) / originallen biggestlength = len(output) else: originallen = complength - (len(thing2)) + (len(splitthing2) - 1) symbolratio = (i - 1) / originallen biggestlength = len(comp) per = (SequenceMatcher(None, output, comp).ratio() * ((biggestlength) / originallen) + symbolratio) * 100 print('The output was ' + str('{0:.2f}'.format(per)) + '% accurate\n') os.chdir('../') if results == True: resultsfile = open(os.getcwd() + '/results.txt', 'a') resultsfile.write('The output was ' + str('{0:.2f}'.format(per)) + '% accurate\n') resultsfile.close() return
def are_similar(str1, str2): global THRESHOLD return SequenceMatcher(None, str1, str2).ratio() >= THRESHOLD
import pandas as pd from difflib import SequenceMatcher df = pd.read_csv('players.csv') z = input('Enter name of player: ') for x in range(len(df['Name'])): a = df['Name'][x] v = a.replace('*', ' ') ratio = SequenceMatcher(None, v, z).ratio() if ratio >= 0.5: print('Name: ', df['Name'][x]) print('Total number of matches played in Test', df['TestM'][x]) print('Total runs in Test', df['TestRuns'][x]) print('Bat avg in Test', df['TestBat Avg'][x]) print('Wickets in Test', df['TestWkts'][x]) print('Bowl avg in Test', df['TestBowl Avg'][x]) print('Total number of matches played in ODI', df['ODIM'][x]) print('Total runs in ODI', df['ODIRuns'][x]) print('Bat avg in ODI', df['ODIBat Avg'][x]) print('Wickets in ODI', df['ODIWkts'][x]) print('Bowl avg in ODI', df['ODIBowl Avg'][x]) print('Total number of matches played in T20', df['T20M'][x]) print('Total runs in T20', df['T20Runs'][x]) print('Bat avg in T20', df['T20Bat Avg'][x]) print('Wickets in T20', df['T20Wkts'][x]) print('Bowl avg in T20', df['T20Bowl Avg'][x])
def fields_match(self, bibcode, s1, s2, field): if (field in s1) and (field in s2): f1 = s1[field] f2 = s2[field] elif (field not in s1) and (field not in s2): if field in self.new_fields: self.logger.warn('Bibcode {}: required new field {} not present'.format(bibcode, field)) return 'required new field not in bibcode' else: self.logger.info('Bibcode {}: field {} not present in either database'.format(bibcode, field)) return 'field not in bibcode' elif field not in s1: self.logger.info('Bibcode {}: field {} not present in old database'.format(bibcode, field)) return 'field not in s1' elif field not in s2: self.logger.info('Bibcode {}: field {} not present in new database'.format(bibcode, field)) return 'field not in s2' # for citations, sort and compare the lists if field == 'citation': if sorted(f1) != sorted(f2): self.logger.warn('Bibcode {}: different numbers of citations present in each database'.format(bibcode)) return False else: return True # allow citation_count to be different by up to 3 if field == 'citation_count': if abs(f1 - f2) > 3: self.logger.warn( 'Bibcode {}: citation_count field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2)) return False else: return True # allow cite_read_boost to differ by up to 10%, unless one field is 0 and the other is non-zero if field == 'cite_read_boost': if (f1 == 0.) and (f2 == 0.): return True elif (f1 == 0. and f2 != 0.) or (f1 != 0. and f2 == 0.): self.logger.warn( 'Bibcode {}: cite_read_boost field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2)) return False elif (abs(f1-f2)/f1) > 0.1: self.logger.warn( 'Bibcode {}: cite_read_boost field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2)) return False else: return True # CDS has changed to SIMBAD in new pipeline; check for this. Then check the rest of the sorted list if field == 'data': if ('CDS' in f1) and ('SIMBAD' in f2): f1.remove('CDS') f2.remove('SIMBAD') if sorted(f1) != sorted(f2): self.logger.warn( 'Bibcode {}: data field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2)) return False else: return True # doctype intechreport has been changed to techreport if (field == 'doctype') and (f1 == 'intechreport') and (f2 == 'techreport'): return True # for identifier, sort first before comparing, since the order has changed if field == 'identifier': if sorted(f1) != sorted(f2): self.logger.warn('Bibcode {}: identifier field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2)) return False else: return True # for references, only check that the total number is the same (otherwise sorting # differences can confuse it) if field == 'reference': if len(f1) != len(f2): self.logger.warn( 'Bibcode {}: different numbers of references present in each database'.format(bibcode)) return False else: return True if f1 != f2: # check how similar strings are if isinstance(f1, str) or (sys.version_info < (3,) and isinstance(f1, unicode)): ratio = SequenceMatcher(None, f1, f2).ratio() if ratio < 0.8: if field == 'body': self.logger.warn( 'Bibcode %s: unicode field %s is different between databases.', bibcode, field,) else: self.logger.warn('Bibcode %s: unicode field %s is different between databases. Old: %r New: %r', bibcode, field, f1, f2) return False else: if field == 'body': self.logger.info( 'Bibcode %s: unicode field %s is slightly different between databases.', bibcode, field) else: self.logger.info('Bibcode %s: unicode field %s is slightly different between databases. Old: %r New: %r', bibcode, field, f1, f2) else: self.logger.warn('Bibcode {}: field {} is different between databases. Old: {} New: {}'.format(bibcode, field, f1, f2)) return False return True
def roundall(numbers): return map(int, map(round, numbers)) # # Fuzzy string match. # SequenceMatcher's first argument is a function that returns true for # characters considered to be "junk". For instance, if blanks are junk, # lambda x: x == " " # To consider nothing as junk, pass None. # from difflib import SequenceMatcher best_ratio = -1 best_match = None for b in string_list: r = SequenceMatcher(None, matchname, b).ratio() if r > best_ratio: best_match = b best_ratio = r # # sorting + lambda examples. # # The cmp function is obsolete. # Instead, use a key function, # which is called on each element of the list prior to sorting. # https://wiki.python.org/moin/HowTo/Sorting # def sort_by_last_letter(words): # sorted() returns a new sorted list. print sorted(words, key = lambda a: a[-1])
def similar(a, b): return SequenceMatcher(None, a, b).ratio()
def similar(a, b): return SequenceMatcher(None, a.replace(" ", "").lower(), b.replace(" ", "").lower()).ratio()
file_name = sys.argv[1] mispelled_word = sys.argv[2] #file_name="file2.csv" #mispelled_word="you" print("Contents of file : ", file_name) a = [] with open(file_name, 'r') as file: reader = csv.reader(file) for row in reader: print(", ".join((map(str, row)))) a.append(row) suggestions = [] for i in range(len(a)): matchRatio = SequenceMatcher(None, mispelled_word, a[i][0]).ratio() field = [a[i][0], matchRatio] suggestions.append(field) suggestions.sort(key=lambda x: x[1], reverse=True) #print(suggestions) if len(suggestions) >= 5: rangeTill = 5 else: rangeTill = len(suggestions) topsuggestions = [] for i in range(rangeTill): topsuggestions.append(suggestions[i][0]) print("\nTop Suggestions :") print(", ".join((map(str, topsuggestions))))
def _longest_match_size(str1, str2): sq = SequenceMatcher(lambda x: x == " ", str1, str2) match = sq.find_longest_match(0, len(str1), 0, len(str2)) return match.size