def detect_period (filename) :
    thres = 0.4
    period = None
    data = read_file(filename).split('\n')
    for text in data:
        if distance(text[:6], 'Kỳ hạn') < thres:
            if distance(text.strip(' -')[-3:], 'Năm') < thres:
                period = digit_detect(replace_char(text.strip().replace(text[:6], '').replace(text[-3:], '')))[0] + ' Năm'
            else:
                period = digit_detect(replace_char(text.strip().replace(text[:6], '').replace(text[-5:], '')))[0] + ' Tháng'
        else:
            continue
    return period
Beispiel #2
0
def match_input_weak(input_wrong, correct):
	valid = []
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong.lower(),input_correct.lower()) == 1:
				valid.append(input_correct)
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong.lower(),input_correct.lower()) == 2:
				valid.append(input_correct)
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong.lower(),input_correct.lower()) == 3:
				valid.append(input_correct)
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong.lower(),input_correct.lower()) == 4:
				valid.append(input_correct)
	if len(valid) == 1:
		return valid[0]
	return None
Beispiel #3
0
def notifyUser(itemOne,itemTwo):
    itemOneGiven = itemOne.givName
    itemOneFI = itemOne.firstInit
    itemOneSurname = itemOne.famName
    itemTwoGiven = itemTwo.givName
    itemTwoFI = itemTwo.firstInit
    itemTwoSurname = itemTwo.famName
    givenDist = distance(itemOneGiven,itemTwoGiven)
    if itemOneSurname == itemTwoSurname and itemOneFI == itemTwoFI and givenDist <= 3:
        return True
    else:
        return False
Beispiel #4
0
def spellCheck(itemOne,itemTwo,totalcount):
    itemOneSurname = itemOne.famName
    itemTwoSurname = itemTwo.famName
    itemOneFirst = itemOne.firstName
    itemTwoFirst = itemTwo.firstName
    itemOnePub = itemOne.primPub + itemOne.secPub
    itemTwoPub = itemTwo.primPub + itemTwo.secPub
    itemOneMiddle = itemOne.midName
    itemTwoMiddle = itemTwo.midName
    if totalcount[itemOneSurname] == 1 and itemOnePub == 1 and itemOneSurname != itemTwoSurname:
        if itemTwoPub >= 2:
            surnameDist = distance(itemOneSurname,itemTwoSurname)
            if surnameDist <= 2:
                if itemTwoFirst.startswith(itemOneFirst): 
                    if itemOneMiddle != '' and itemTwoMiddle != '' and itemTwoMiddle.startswith(itemOneMiddle):
                        return True
                    elif itemOneMiddle == '' and itemTwoMiddle == '':
                        return True
    elif itemOneSurname == itemTwoSurname and itemOnePub == 1:
        givenDist = distance(itemOneFirst,itemTwoFirst)
        if givenDist <= 1 and len(itemOneFirst) > 1 and len(itemTwoFirst) > 1:
            if itemOneMiddle == itemTwoMiddle:
                return True
    return False
Beispiel #5
0
def compute_similarity(a, b):
    """
    Compute the similarity between two given ActiveSite instances.

    Distance is calculated using the Damerau-Levenshtein distance. This algorithm quantifies the
    'distance' between two strings by counting the number of operations that are needed to transform one string
    into another. In this algorithm the operations allowed are inserting a letter, deleting a letter, subsituting a letter
    or transposing a letter. This is an improvement on the Levenshtein distance algorithm that preceeded it as
    transpositions are common errors, both in the written language, and may have a viable biological relevance.

    Implementation from-  https://github.com/gfairchild/pyxDamerauLevenshtein
    Algorithmic understanding from- https://www.mathworks.com/matlabcentral/cody/problems/2309-calculate-the-damerau-levenshtein-distance-between-two-strings

    Input: two ActiveSite instances
    Output: the similarity between them (a floating point number)
    """
    return distance(a, b)
def detect_currency(filename):
    data = read_file(filename).split()
    cur = None
    for i in data :
        if i in currencys:
            cur = i
        else:
            continue

    if cur != None:
        return cur
    else:
        thres = 0.5
        for i in currencys :
            for j in data :
                if distance(i, j) < thres:
                    cur = i
                else:
                    continue
    return cur
Beispiel #7
0
def terms_match(requested_term: str, source_term: str) -> bool:
    if MAX_LEVENSHTEIN_DISTANCE > 0:
        return distance(requested_term, source_term) < MAX_LEVENSHTEIN_DISTANCE
    return requested_term == source_term
Beispiel #8
0
def match_input(input_wrong, correct):
	valid = []
	supplement = ''
	perm = {}

	# checks if a valid command is a substring of input
	for input_correct in correct:
		if input_correct in input_wrong:
			valid.append(input_correct)

	# if one is found, then check if input begins with input_wrong
	# if so, user probably did an accidental concatenation
	if len(valid) == 1:
		if input_wrong.startswith(valid[0]):
			supplement = input_wrong[len(valid[0]):]

	# checks if D-L distance is 1
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong,input_correct) == 1:
				valid.append(input_correct)

	# checks if D-L distance is 2 or norm. D-L distance <= 3
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong,input_correct) == 2 or norm_distance(input_wrong,input_correct) <= 0.3:
				valid.append(input_correct)

	# checks if a permutation of a valid command is a substring of input
	if len(valid) == 0:
		perm = perm_dict(correct)
		for input_correct in correct:
			for p in perm[input_correct]:
				if p in input_wrong and input_correct not in valid:
					valid.append(input_correct)

	# checks if a unique valid command starts with the input
	if len(valid) == 0:
		for input_correct in correct:
			if input_correct.startswith(input_wrong):
				valid.append(input_correct)

	# check if commands starts with substrings of input
	# probably should be a last resort measure
	substr_len = 1
	while len(valid) == 0 and substr_len <= len(input_wrong):
		substr_input_wrong = input_wrong[:substr_len]
		for input_correct in correct:
			if input_correct.startswith(substr_input_wrong):
				valid.append(input_correct)
		substr_len += 1

	# checks if D-L distance is 3
	if len(valid) == 0:
		for input_correct in correct:
			if distance(input_wrong,input_correct) == 3:
				valid.append(input_correct)

	# checks if there is a permutation with D-L distance of 1
	if len(valid) == 0:
		for input_correct in correct:
			for p in perm[input_correct]:
				if distance(p,input_wrong) == 1 and input_correct not in valid:
					valid.append(input_correct)

	# if there are multiple valid commands found, choose the one that starts
	# with same letter as input (if unique)
	if len(valid) > 1:
		to_remove = []
		for input_wrong_temp in valid:
			if input_wrong_temp[0] != input_wrong[0]:
				to_remove.append(input_wrong_temp)
		for input_wrong_temp in to_remove:
			valid.remove(input_wrong_temp)

	if len(valid) == 1:
		return valid[0],supplement
	return None
Beispiel #9
0
def single_linkage(cluster1, cluster2):

    avg_cluster1 = avg_string(cluster1)
    avg_cluster2 = avg_string(cluster2)

    return distance(avg_cluster1, avg_cluster2)