Python levの例、Levenshtein.lev Pythonの例

コード例 #1

0

ファイルを表示

def levenshtein_method(cdr3, cutoff=1):
    edgelist = set()
    combos = [comb for comb in combinations(list(cdr3), 2)]
    for combo in combos:
        d = lev(combo[0], combo[1])
        if d <= cutoff:
            edgelist.add(combo[0] + "\t" + combo[1] + "\t" + str(d))
    return edgelist

コード例 #2

0

ファイルを表示

def have_same_root(word1, word2):
    w1_stem = stemmer.stemWord(word1)
    w2_stem = stemmer.stemWord(word2)
    if w1_stem == w2_stem or lev(w1_stem, w2_stem) <= 2:
        return True
    comm = common_letters(w1_stem,w2_stem)
    if comm < 4:
        return False
    if comm > 6:
        return True
    diff = abs(len(max(w1_stem,w2_stem))-comm)        
    if diff > comm or diff > 2:
        return False
    return True

コード例 #3

0

ファイルを表示

ファイル: app.py プロジェクト: TiBiBa/hedy

def get_similar_code(processed_code, language, level):
    filename = "coursedata/similar-code-files/" + language + "/level" + str(level) + ".csv"
    shortest_distance = 75  # This is the threshold: when differ more than this value it's no longer similar code
    similar_code = None
    try:
        with open(filename, mode='r', encoding='utf-8') as file:
            csvFile = csv.reader(file, quoting=csv.QUOTE_MINIMAL)
            for lines in csvFile:
                distance = lev(processed_code, lines[1])
                if distance < 1:  # The code is identical, no need to search any further
                    similar_code = lines[0]
                    break
                else:
                    if distance < shortest_distance:
                        shortest_distance = distance
                        similar_code = lines[0]
    except:
        similar_code = None
    return similar_code

コード例 #4

0

ファイルを表示

def getCloserMatch(word, wordlist):
    """
    Search closest match (using Levenshtein distance)

    Args:
        word:           The word to search in the list.

        wordlist:       Path to the wordlist to perform the search into.

    Returns:
        A tuple holding the matches array and Levenshtein distance between the
        word variable and these matches.

    Example:
        >>> getCloserMatch("azerto", "wordlists/french_top1000.txt")
        Levenshtein distance: 1

        Match: azerty
        Match: azert
    """

    try:
        f = open(wordlist, 'r')
        passwords = f.read().splitlines()
        f.close()
    except:
        print("Could not find required file")
        exit(1)

    min_distance = math.inf
    matches = []
    for password in passwords:
        distance = lev(word, password)
        if distance < min_distance:
            min_distance = distance
            matches = [password]

        elif distance == min_distance:
            matches.append(password)

    return matches, min_distance

コード例 #5

0

ファイルを表示

ファイル: speech_recognition.py プロジェクト: lukasvast/speech-signal-segments-classification-using-mfcc

            clean.append(poljeOrig[x - 1])

#stavljanje u string i ciscenje sil, uzdah, buka i greska segmenata
stringGlasova = ""
for x in range(len(clean)):
    if str(clean[x]) != "sil" and str(clean[x]) != "uzdah" and str(
            clean[x]) != "buka" and str(clean[x]) != "greska":
        stringGlasova = stringGlasova + clean[x]

#ispis prepoznatog i originalnog stringa glasova
print "\nISPIS ORIGINAL I PREPOZNATO"
print stringLab
print stringGlasova

#racunanje preciznosti s levenshtein distancom
edit_dist = lev(stringLab, stringGlasova)
print "\nPRECIZNOST"
if odabirDistance == "M":
    print "KORISTECI MAHALANOBISOVU UDALJENOST"
elif odabirDistance == "E":
    print "KORISTECI EUKLIDOVU UDALJENOST"
print str(100 - float(edit_dist) / len(labData) * 100) + "%"

#formatiranje i zapisivanje u out.lab
start = 0
distribution = len(mfcc_feat) * 100000 / len(clean)
text_file = open("out/" + audioSignal.split(".")[0] + "Out.lab", "w")
text_file.write("")
text_file.close()
for x in range(len(clean)):
    text_file = open("out/" + audioSignal.split(".")[0] + "Out.lab", "a")

コード例 #6

0

ファイルを表示

ファイル: edit_dist.py プロジェクト: blueCormorant/Hiragana-G2P

#!/usr/bin/env python
from Levenshtein import distance as lev


def pairwise(iterable):
    a = iter(iterable)
    return zip(a, a)


with open("out.txt", "r") as _file:
    errors = []
    for line in _file:
        if not line[:2] == "--":
            errors.append(line[2:].strip())

_sum = 0
for transcribed, gold in pairwise(errors):
    _sum = _sum + lev(transcribed, gold)

msg = f"""Out of {round(len(errors)/2)} errors, the average edit distance was {_sum/(len(errors)/2)}"""
print(msg)

コード例 #7

0

ファイルを表示

def levenshtein(x, y, n=None):
    if n is not None:
        x = x[:n]
        y = y[:n]
    return lev(x, y) / (max(len(x), len(y)) if max(len(x), len(y)) > 0 else 1)

コード例 #8

0

ファイルを表示

def extract_text_chat_screen(frame, frame_count=0):
    # cv2.imshow("crop_img", crop_img)

    global ChatLogArray
    img = frame.copy()

    # convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # threshold
    ret, bin = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY)

    # closing
    kernel = np.ones((3, 3), np.uint8)
    closing = cv2.morphologyEx(bin, cv2.MORPH_CLOSE, kernel)

    # invert black/white
    inv = cv2.bitwise_not(closing)
    # cv2.imshow("img_outline", closing)
    # cv2.waitKey()

    height = 1080

    # a boolean which indicated if the messages from the
    # person playing the game should also be included
    include_player_messages = True

    # because the iterator might need to change, don't use for loop
    y = 0  # start at the top
    while y < 750:  # stop when the chat input box is reached
        y = y + 1
        if y > 750:  # reached the end of chat. So stop here
            break
        x = 288  # in the case of 1920 x 1080

        is_message_box = False  # Check if there is a message box at this y
        if closing[y, x] == 255 and closing[y + 100, x] == 255:
            is_message_box = True

        if is_message_box:
            end_y = y + 10
            while closing[end_y, x] == 255:
                end_y = end_y + 1

            crop_img_text = closing[y + 50:end_y - 2, 420:1253]
            if crop_img_text.size == 0:
                print("failure in grabbing text from chat")
                y = end_y + 10
                continue

            OCR_result = get_text_from_image(crop_img_text)

            # how many pixels to move downwards to find the color of the person
            color_person_y = 50
            # get the color values of the player saying the word
            b, g, r = frame[y + color_person_y, x + 100]
            #  get the color name based on the RGB values
            color_name = utils.get_color_name(r, g, b)

            #  variable which indicates if a chat message is already logged
            #  This is done to prevent duplicates
            already_logged = False
            for chat in ChatLogArray:

                if color_name == chat.colorName:
                    if chat.message == OCR_result:
                        already_logged = True
                    # Also check if the levenshtein distance == 1 in case of OCR errors
                    # if the distance is only 1, then it very likely saw the same message,
                    # which is usually caused by an OCR error reading a character the wrong way
                    levenshtein_distance = lev(chat.message, OCR_result)
                    if levenshtein_distance == 1:
                        already_logged = True

            # if the chat entry doesn't exist yet, create it
            if not already_logged:
                # now get the name through OCR
                crop_img = closing[y:y + 50, 420:1253]

                OCR_result_name = get_text_from_image(crop_img)

                # now put it all in the chat logger
                new_log = ChatLog(len(ChatLogArray), r, g, b, color_name, frame_count, OCR_result_name, OCR_result)
                ChatLogArray.append(new_log)
            y = end_y + 10

    # now do the same for the player's chat messages.
    y = 0
    while (y < 750):
        y = y + 1
        if y > 750:  # reached the end of chat. So stop here
            break
        x_player = 1375  # player messages in the case of 1920 x 1080

        is_message_box_player = False
        if closing[y, x_player] == 255 and closing[y + 100, x_player] == 255 and include_player_messages:
            is_message_box_player = True

        if is_message_box_player:
            end_y = y + 10
            while closing[end_y, x_player] == 255:
                end_y = end_y + 1

            crop_img_text = closing[y + 50:end_y - 2, 420:1253]
            if crop_img_text.size == 0:
                print("failure in grabbing text from chat")
                y = end_y + 10
                continue

            OCR_result = get_text_from_image(crop_img_text)

            # how many pixels to move downwards to find the color of the person
            color_person_y = 50

            # get the color values of the player saying the word
            b, g, r = frame[y + color_person_y, x_player - 40]

            color_name = utils.get_color_name(r, g, b)

            already_logged = False
            for chat in ChatLogArray:
                # and chat.r == r and chat.g == g and chat.b == b
                # check if this crewmate already said this (don't log double)

                if color_name == chat.colorName:
                    if chat.message == OCR_result:
                        already_logged = True
                    # Also check if the levenshtein distance == 1 in case of OCR errors
                    # if the distance is only 1, then it very likely saw the same message,
                    # which is usually caused by an OCR error reading a character the wrong way
                    levenshtein_distance = lev(chat.message, OCR_result)
                    if levenshtein_distance == 1:
                        already_logged = True

                # if chat.r == r and chat.g == g and chat.b == b:
                # already_logged = True

            # if the chat entry doesn't exist yet. Create it
            if not already_logged:
                # now get the name through OCR
                # crop_img = closing[ y+50:y+100, 420:1253]
                crop_img = closing[y:y + 50, 420:1253]

                OCR_result_name = get_text_from_image(crop_img)

                # now put it all in the chat logger
                new_log = ChatLog(len(ChatLogArray), r, g, b, color_name, frame_count, OCR_result_name, OCR_result)
                ChatLogArray.append(new_log)
                # print(ChatLogArray)
            # cv2.floodFill()
            # cv2.imshow("img_outline", crop_img)
            y = end_y + 10