Example #1
0
def find_definition(tree, query):
    try:
        nodes = tree.xpath(".//def-para/para/text/def-term[contains(.,'%s')]" %
                           query)
        lev_nodes = sorted(map(lambda x: (x, levenshtein(query, x.text)),
                               nodes),
                           key=itemgetter(1))
        return lev_nodes[0][0].iterancestors(tag='def-para').next()
    except Exception, e:
        raise CustomException("Path for definition not found")
Example #2
0
def getCodeNumberForProvince(province):

    # Bind the first argument
    lv = lambda x: levenshtein(province.lower(), x.lower())

    provinces = getProvinces()

    # Apply the levenshtein distance to all the province names
    computed = [(x['code_number'], lv(x['pretty_name'])) for x in provinces]

    # Return the code for the most similar one (lowest levenshtein distance)
    return min(computed, key=lambda x:x[1])[0]
Example #3
0
def name_prompt(prompt, people, name):
    person = None

    while (person is None):
        print(prompt)
        choices = [ person for person in sorted(people, key=lambda s: levenshtein(s.full_name.lower(), name)) ][:5]
        for number,choice in enumerate(choices):
            print("{}) {}".format(number+1, choice.full_name))
        user_choice = input('Choose one or write in: ')
        try:
            person =  choices[int(user_choice)-1]
        except ValueError as e:
            return user_choice #TODO: move looping until valid name into here?
        except IndexError as e:
            print("Invalid selection")

    return person.full_name
Example #4
0
 def get_lv_score(self, tweet_text):
     best_score = 0
     best_name = ''
     words = tweet_text.strip().split(' ')
     for name, cluster in self.clusters.items():
         cluster_max = 0
         for word in words:
             local_max = 0
             for sent_word in cluster:
                 local_max = max(local_max, levenshtein(word, sent_word))
             cluster_max += local_max
         cluster_max /= len(words)
         if cluster_max > best_score:
             best_score = cluster_max
             best_name = name
     if self.sentiments[best_name] == self.sents['neg']:
         best_score *= -1
     return best_score
    matching_code = None

    # Go over all the recorded codes
    for current_name, current_code in codes.items():

        # If there's a direct match
        if province["pretty_name"].lower() in current_name.lower():
            matching_code = current_code
            break

    # If no matching code has been found, try to find the most similar province name
    # using levenshtein distance

    if not matching_code:
        min_val = len(current_name) * 2

        for current_name, current_code in codes.items():
            cur_val = levenshtein(province["pretty_name"].lower(),
                                  current_name.lower())

            if min_val > cur_val:
                min_val = cur_val
                matching_code = current_code

    province["code_number"] = matching_code

# Write the results to the file
with io.open(output_file, "w", encoding='utf8') as the_file:
    the_file.write(json.dumps(provinces, indent=2, ensure_ascii=False))

print "{} provinces written to {}".format(len(provinces), output_file)
    matching_code = None

    # Go over all the recorded codes
    for current_name, current_code in codes.items():

        # If there's a direct match
        if province["pretty_name"].lower() in current_name.lower():
            matching_code = current_code
            break

    # If no matching code has been found, try to find the most similar province name
    # using levenshtein distance

    if not matching_code:
        min_val = len(current_name) * 2

        for current_name, current_code in codes.items():
            cur_val = levenshtein(province["pretty_name"].lower(), current_name.lower())

            if min_val > cur_val:
                min_val = cur_val
                matching_code = current_code

    province["code_number"] = matching_code

# Write the results to the file
with io.open(output_file, "w", encoding='utf8') as the_file:
    the_file.write(json.dumps(provinces, indent=2, ensure_ascii=False))

print "{} provinces written to {}".format(len(provinces), output_file)