def detect_period (filename) : thres = 0.4 period = None data = read_file(filename).split('\n') for text in data: if distance(text[:6], 'Kỳ hạn') < thres: if distance(text.strip(' -')[-3:], 'Năm') < thres: period = digit_detect(replace_char(text.strip().replace(text[:6], '').replace(text[-3:], '')))[0] + ' Năm' else: period = digit_detect(replace_char(text.strip().replace(text[:6], '').replace(text[-5:], '')))[0] + ' Tháng' else: continue return period
def match_input_weak(input_wrong, correct): valid = [] if len(valid) == 0: for input_correct in correct: if distance(input_wrong.lower(),input_correct.lower()) == 1: valid.append(input_correct) if len(valid) == 0: for input_correct in correct: if distance(input_wrong.lower(),input_correct.lower()) == 2: valid.append(input_correct) if len(valid) == 0: for input_correct in correct: if distance(input_wrong.lower(),input_correct.lower()) == 3: valid.append(input_correct) if len(valid) == 0: for input_correct in correct: if distance(input_wrong.lower(),input_correct.lower()) == 4: valid.append(input_correct) if len(valid) == 1: return valid[0] return None
def notifyUser(itemOne,itemTwo): itemOneGiven = itemOne.givName itemOneFI = itemOne.firstInit itemOneSurname = itemOne.famName itemTwoGiven = itemTwo.givName itemTwoFI = itemTwo.firstInit itemTwoSurname = itemTwo.famName givenDist = distance(itemOneGiven,itemTwoGiven) if itemOneSurname == itemTwoSurname and itemOneFI == itemTwoFI and givenDist <= 3: return True else: return False
def spellCheck(itemOne,itemTwo,totalcount): itemOneSurname = itemOne.famName itemTwoSurname = itemTwo.famName itemOneFirst = itemOne.firstName itemTwoFirst = itemTwo.firstName itemOnePub = itemOne.primPub + itemOne.secPub itemTwoPub = itemTwo.primPub + itemTwo.secPub itemOneMiddle = itemOne.midName itemTwoMiddle = itemTwo.midName if totalcount[itemOneSurname] == 1 and itemOnePub == 1 and itemOneSurname != itemTwoSurname: if itemTwoPub >= 2: surnameDist = distance(itemOneSurname,itemTwoSurname) if surnameDist <= 2: if itemTwoFirst.startswith(itemOneFirst): if itemOneMiddle != '' and itemTwoMiddle != '' and itemTwoMiddle.startswith(itemOneMiddle): return True elif itemOneMiddle == '' and itemTwoMiddle == '': return True elif itemOneSurname == itemTwoSurname and itemOnePub == 1: givenDist = distance(itemOneFirst,itemTwoFirst) if givenDist <= 1 and len(itemOneFirst) > 1 and len(itemTwoFirst) > 1: if itemOneMiddle == itemTwoMiddle: return True return False
def compute_similarity(a, b): """ Compute the similarity between two given ActiveSite instances. Distance is calculated using the Damerau-Levenshtein distance. This algorithm quantifies the 'distance' between two strings by counting the number of operations that are needed to transform one string into another. In this algorithm the operations allowed are inserting a letter, deleting a letter, subsituting a letter or transposing a letter. This is an improvement on the Levenshtein distance algorithm that preceeded it as transpositions are common errors, both in the written language, and may have a viable biological relevance. Implementation from- https://github.com/gfairchild/pyxDamerauLevenshtein Algorithmic understanding from- https://www.mathworks.com/matlabcentral/cody/problems/2309-calculate-the-damerau-levenshtein-distance-between-two-strings Input: two ActiveSite instances Output: the similarity between them (a floating point number) """ return distance(a, b)
def detect_currency(filename): data = read_file(filename).split() cur = None for i in data : if i in currencys: cur = i else: continue if cur != None: return cur else: thres = 0.5 for i in currencys : for j in data : if distance(i, j) < thres: cur = i else: continue return cur
def terms_match(requested_term: str, source_term: str) -> bool: if MAX_LEVENSHTEIN_DISTANCE > 0: return distance(requested_term, source_term) < MAX_LEVENSHTEIN_DISTANCE return requested_term == source_term
def match_input(input_wrong, correct): valid = [] supplement = '' perm = {} # checks if a valid command is a substring of input for input_correct in correct: if input_correct in input_wrong: valid.append(input_correct) # if one is found, then check if input begins with input_wrong # if so, user probably did an accidental concatenation if len(valid) == 1: if input_wrong.startswith(valid[0]): supplement = input_wrong[len(valid[0]):] # checks if D-L distance is 1 if len(valid) == 0: for input_correct in correct: if distance(input_wrong,input_correct) == 1: valid.append(input_correct) # checks if D-L distance is 2 or norm. D-L distance <= 3 if len(valid) == 0: for input_correct in correct: if distance(input_wrong,input_correct) == 2 or norm_distance(input_wrong,input_correct) <= 0.3: valid.append(input_correct) # checks if a permutation of a valid command is a substring of input if len(valid) == 0: perm = perm_dict(correct) for input_correct in correct: for p in perm[input_correct]: if p in input_wrong and input_correct not in valid: valid.append(input_correct) # checks if a unique valid command starts with the input if len(valid) == 0: for input_correct in correct: if input_correct.startswith(input_wrong): valid.append(input_correct) # check if commands starts with substrings of input # probably should be a last resort measure substr_len = 1 while len(valid) == 0 and substr_len <= len(input_wrong): substr_input_wrong = input_wrong[:substr_len] for input_correct in correct: if input_correct.startswith(substr_input_wrong): valid.append(input_correct) substr_len += 1 # checks if D-L distance is 3 if len(valid) == 0: for input_correct in correct: if distance(input_wrong,input_correct) == 3: valid.append(input_correct) # checks if there is a permutation with D-L distance of 1 if len(valid) == 0: for input_correct in correct: for p in perm[input_correct]: if distance(p,input_wrong) == 1 and input_correct not in valid: valid.append(input_correct) # if there are multiple valid commands found, choose the one that starts # with same letter as input (if unique) if len(valid) > 1: to_remove = [] for input_wrong_temp in valid: if input_wrong_temp[0] != input_wrong[0]: to_remove.append(input_wrong_temp) for input_wrong_temp in to_remove: valid.remove(input_wrong_temp) if len(valid) == 1: return valid[0],supplement return None
def single_linkage(cluster1, cluster2): avg_cluster1 = avg_string(cluster1) avg_cluster2 = avg_string(cluster2) return distance(avg_cluster1, avg_cluster2)