Python SequenceMatcher.SequenceMatcher Exemples, difflib.SequenceMatcher.SequenceMatcher Python Exemples

Exemple #1

0

Afficher le fichier

def _longest_match_ratio(str1, str2):
    sq = SequenceMatcher(lambda x: x == " ", str1, str2)
    match = sq.find_longest_match(0, len(str1), 0, len(str2))
    return np_utils._try_divide(match.size, min(len(str1), len(str2)))

Exemple #2

0

Afficher le fichier

# Manupulating date time
dt = datetime.now()

print dt.strftime('%m - %d - %Y :: ')
print dt.strftime('%x %X')
exit()

# Hacking difflib
# stud = '1**2*'
# our = '1*2'
our = "*\n**\n***\n****\n"
stud = "      *\n    * *\n  * * *\n* * * *\n"

space_match = 36

s = SequenceMatcher(lambda x: x == " ", stud, our)

def_ratio = s.ratio()
comb_len = len(stud) + len(our)
def_match = def_ratio * comb_len
new_match = def_match + space_match
new_ratio = new_match / comb_len
print "space match : ", space_match
print "old ratio : ", def_ratio
print "new ratio : ", new_ratio

print our
print stud
# exit()

out_ref = open('out_ref.txt')

Exemple #3

0

Afficher le fichier

Fichier : searchCitations.py Projet : cnettlev/paper-citations

def compareTitles(title1, title2, opt=options.matcher):
    if opt < 1:
        return SequenceMatcher(None, title1, title2).ratio() > opt
    else:
        return title1 == title2

Exemple #4

0

Afficher le fichier

Fichier : main.py Projet : rnetonet/cryptofun

def avalanche(a, b):
    return 100.0 - (SequenceMatcher(None, a, b).ratio() * 100)

Exemple #5

0

Afficher le fichier

Fichier : match_dataframes.py Projet : Ahmad-Bamba/pyPDF-OCR

def get_similarity(l1, l2):
    return SequenceMatcher(None, l1, l2).ratio()

Exemple #6

0

Afficher le fichier

def similar(string_A, string_B):
    return SequenceMatcher(None, string_A, string_B).ratio()

Exemple #7

0

Afficher le fichier

def getSimilarRatio(str1, str2):
    return SequenceMatcher(None, str1, str2).ratio()

Exemple #8

0

Afficher le fichier

 def _get_patterns_similarity(pattern_a, pattern_b):
     similarity = SequenceMatcher(None, pattern_a, pattern_b).ratio()
     return round(similarity, 2)

Exemple #9

0

Afficher le fichier

Fichier : compare.py Projet : KeepSafe/html-structure-diff

def _diff_ranges(seq1, seq2):
    opcodes = SequenceMatcher(a=seq1, b=seq2, autojunk=False).get_opcodes()
    return list(filter(lambda i: i[0] != 'equal', opcodes))

Exemple #10

0

Afficher le fichier

Fichier : TALinks.py Projet : fstien/UndergradDissertation

def compare(a, b):
    return SequenceMatcher(None, a, b).ratio()

Exemple #11

0

Afficher le fichier

dummy_input = tf.random.uniform((7, 80, 64), dtype=tf.float32)
mel_pred = model_tf(dummy_input, training=False)

# get tf variables
tf_vars = model_tf.weights

# match variable names with fuzzy logic
torch_var_names = list(state_dict.keys())
tf_var_names = [we.name for we in model_tf.weights]
var_map = []
for tf_name in tf_var_names:
    # skip re-mapped layer names
    if tf_name in [name[0] for name in var_map]:
        continue
    tf_name_edited = convert_tf_name(tf_name)
    ratios = [SequenceMatcher(None, torch_name, tf_name_edited).ratio() for torch_name in torch_var_names]
    max_idx = np.argmax(ratios)
    matching_name = torch_var_names[max_idx]
    del torch_var_names[max_idx]
    var_map.append((tf_name, matching_name))

# pass weights
tf_vars = transfer_weights_torch_to_tf(tf_vars, dict(var_map), state_dict)

# Compare TF and TORCH models
# check embedding outputs
model.eval()
dummy_input_torch = torch.ones((1, 80, 10))
dummy_input_tf = tf.convert_to_tensor(dummy_input_torch.numpy())
dummy_input_tf = tf.transpose(dummy_input_tf, perm=[0, 2, 1])
dummy_input_tf = tf.expand_dims(dummy_input_tf, 2)

Exemple #12

0

Afficher le fichier

 wordList_2 = []  #creates an empty list
 f = open("checkMe2.txt", "a")
 for word_1 in sentence_2.split(
 ):  #loops through every word in the file
     if word_1 in text.split(
     ):  #if word from checkMe text file is in EnglishWords text file
         f.write(
             word_1 + " "
         )  #then write the word to a new text file "checkMe2"
         correct_2 += 1
     elif word_1 not in text.split():  #if word not it dictionary
         incorrect_2 += 1
         for i in range(
                 0, 84094
         ):  #loops through every word in the EnglishWords file
             score1 = SequenceMatcher(None, word_1, alt[i]).ratio(
             )  #compares the wrong word with each word in EnglishWords file and gives a ratio of how similar the words are
             if score1 >= 0.75:  #if the ratio similarity of the word in greater than 0.75
                 print(
                     "\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500"
                     "\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500"
                     "\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510"
                 )
                 print(
                     "\u2502          W O R D  N O T  F O U N D      \u2502"
                 )
                 print(
                     "\u2502                                         \u2502"
                 )
                 print("\u2502 " + word_1,
                       (" " * (23 - len(word_1))),
                       "               \u2502")

Exemple #13

0

Afficher le fichier

def transfer_casing_for_similar_text(text_w_casing, text_wo_casing):
    """Transferring the casing from one text to another - for similar
    (not matching) text

    1. It will use `difflib`'s `SequenceMatcher` to identify the
       different type of changes needed to turn `text_w_casing` into
       `text_wo_casing`
    2. For each type of change:

       - for inserted sections:

         - it will transfer the casing from the prior character
         - if no character before or the character before is the\
           space, then it will transfer the casing from the following\
           character

       - for deleted sections: no case transfer is required
       - for equal sections: just swap out the text with the original,\
         the one with the casings, as otherwise the two are the same
       - replaced sections: transfer the casing using\
         :meth:`transfer_casing_for_matching_text` if the two has the\
         same length, otherwise transfer character-by-character and\
         carry the last casing over to any additional characters.

    Parameters
    ----------
    text_w_casing : str
        Text with varied casing
    text_wo_casing : str
        Text that is in lowercase only

    Returns
    -------
    text_wo_casing : str
        If `text_wo_casing` is empty
    c : str
        Text with the content of `text_wo_casing` but the casing of
        `text_w_casing`

    Raises
    ------
    ValueError
        If `text_w_casing` is empty
    """
    if not text_wo_casing:
        return text_wo_casing

    if not text_w_casing:
        raise ValueError("We need 'text_w_casing' to know what "
                         "casing to transfer!")

    _sm = SequenceMatcher(None, text_w_casing.lower(), text_wo_casing)

    # we will collect the case_text:
    c = ''

    # get the operation codes describing the differences between the
    # two strings and handle them based on the per operation code rules
    for tag, i1, i2, j1, j2 in _sm.get_opcodes():
        # Print the operation codes from the SequenceMatcher:
        # print("{:7}   a[{}:{}] --> b[{}:{}] {!r:>8} --> {!r}"
        #       .format(tag, i1, i2, j1, j2,
        #               text_w_casing[i1:i2],
        #               text_wo_casing[j1:j2]))

        # inserted character(s)
        if tag == "insert":
            # if this is the first character and so there is no
            # character on the left of this or the left of it a space
            # then take the casing from the following character
            if i1 == 0 or text_w_casing[i1 - 1] == " ":
                if text_w_casing[i1] and text_w_casing[i1].isupper():
                    c += text_wo_casing[j1:j2].upper()
                else:
                    c += text_wo_casing[j1:j2].lower()
            else:
                # otherwise just take the casing from the prior
                # character
                if text_w_casing[i1 - 1].isupper():
                    c += text_wo_casing[j1:j2].upper()
                else:
                    c += text_wo_casing[j1:j2].lower()

        elif tag == "delete":
            # for deleted characters we don't need to do anything
            pass

        elif tag == "equal":
            # for 'equal' we just transfer the text from the
            # text_w_casing, as anyhow they are equal (without the
            # casing)
            c += text_w_casing[i1:i2]

        elif tag == "replace":
            _w_casing = text_w_casing[i1:i2]
            _wo_casing = text_wo_casing[j1:j2]

            # if they are the same length, the transfer is easy
            if len(_w_casing) == len(_wo_casing):
                c += transfer_casing_for_matching_text(
                    text_w_casing=_w_casing, text_wo_casing=_wo_casing)
            else:
                # if the replaced has a different length, then we
                # transfer the casing character-by-character and using
                # the last casing to continue if we run out of the
                # sequence
                _last = "lower"
                for w, wo in zip_longest(_w_casing, _wo_casing):
                    if w and wo:
                        if w.isupper():
                            c += wo.upper()
                            _last = "upper"
                        else:
                            c += wo.lower()
                            _last = "lower"
                    elif not w and wo:
                        # once we ran out of 'w', we will carry over
                        # the last casing to any additional 'wo'
                        # characters
                        c += wo.upper() if _last == "upper" else wo.lower()
    return c

Exemple #14

0

Afficher le fichier

Fichier : utils.py Projet : jman12343/AIuntested

def get_similarity(a, b):
    return SequenceMatcher(None, a, b).ratio()

Exemple #15

0

Afficher le fichier

Fichier : fuzzy_join.py Projet : giebs29/helpful-pyscripts

def compare_values(a, b):
    return SequenceMatcher(None, a.lower().strip(), b.lower().strip()).ratio()

Exemple #16

0

Afficher le fichier

def similar(a, b):
    # Returns a decimal representing the similiarity between the two strings.
    return SequenceMatcher(None, a, b).ratio()

Exemple #17

0

Afficher le fichier

Fichier : bag_info.py Projet : dpatters97/summer2019

def similar(a, b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

Exemple #18

0

Afficher le fichier

Fichier : utils.py Projet : WillKoehrsen/wikirec

def _check_str_similarity(str_1, str_2):
    """Checks the similarity of two strings"""
    return SequenceMatcher(None, str_1, str_2).ratio()

Exemple #19

0

Afficher le fichier

Fichier : throne_cmds.py Projet : saranshgupta1995/The-Iron-Throne

def areSimilar(a, b):
    return SequenceMatcher(None, a, b).ratio() > 0.8

Exemple #20

0

Afficher le fichier

Fichier : typing_game.py Projet : yudisupriyadi123/typing_game

def typing_game():

    game_start = str(input("\nStart Game? (Y/N): ")).lower()

    if game_start == 'y':
        print("\nGet ready...\n")
        t.sleep(1)
        print("Get Set...\n")
        t.sleep(1)
        print("Go!\n")
        t.sleep(0.5)
        random_sentence = normal_sentences()
        number_of_words = len(random_sentence.split())
        time_start = dt.datetime.now()
        player_input = input(("Your sentence is: \n" + '"' + random_sentence + '"' '\n'))
        if player_input == random_sentence:
            time_end = dt.datetime.now()
            print("Complete!\n")
            t.sleep(0.5)
            time = time_end - time_start
            time_seconds = round(time.total_seconds(), 2)
            print("Your total time was", time_seconds,'seconds!\n')
            wpm = float(round(((60 / time_seconds) * number_of_words), 2))
            print("You have a typing speed of", wpm, "WPM\n")
            accuracy = 100
            print("You have an accuracy of", accuracy, '%\n')
            #scores = score_list()
            name = input("What is your name?: ")
            print("Added to high scores list!")
            t.sleep(1)
            add_score(name, wpm)
            main_menu()
        else:
            player_input_arr = []
            random_sentence_arr = []
            for char in player_input:
                player_input_arr.append(char)
            for i in random_sentence:
                random_sentence_arr.append(i)
            time_end = dt.datetime.now()
            print("Complete!\n")
            t.sleep(0.5)
            time = time_end - time_start
            time_seconds = round(time.total_seconds(), 2)
            print("Your total time was", time_seconds,'seconds!\n')
            wpm = float(round(((60 / time_seconds) * number_of_words), 2))
            similarity = round(((SequenceMatcher(None, player_input_arr, random_sentence_arr).ratio()) * 100),2)
            print("You typed the sentence with ", similarity, " % accuracy!")
            print("\nYou have a typing speed of", wpm, "WPM\n")
            print("\nGet 100% accuracy to be added to the high score list!")
            #name = input("What is your name?: ")
            #t.sleep(1)
            #add_score(name, wpm)
            main_menu()

    else:
        if game_start in ['N', 'n']:
            print("\nReturning to the Main Menu\n")
            t.sleep(1)
            main_menu()
        else:
            print("Returning to the Main Menu")
            main_menu()

Exemple #21

0

Afficher le fichier

Fichier : missing_dependency_finder.py Projet : thoward/pants

 def sort(self, strings):
     return sorted(
         strings,
         key=lambda str: SequenceMatcher(a=self._base_str, b=str).ratio(),
         reverse=True)

Exemple #22

0

Afficher le fichier

Fichier : scriptextest.py Projet : stratzilla/SCRiPTeX

def file_compare(outfile, expected, resultsfile,
                 results):  #file compare function
    comp = open(expected).read()
    output = open(outfile).read()
    if len(output) >= 84 and len(comp) >= 84:
        output = output[70:len(output) - 14]
        comp = comp[70:len(comp) - 14]
    elif len(output) < 84 and len(comp) >= 84:
        comp = comp[70:len(comp) - 14]
    elif len(comp) < 84 and len(output) >= 84:
        output = output[70:len(output) - 14]
    thing = ''
    thing2 = ''
    complength = len(comp)
    outlength = len(output)
    start = False
    j = 0
    while len(output) > j:
        if output[j] == '\\':
            thing = thing + output[j]
            output = output[:j] + output[j + 1:]
            start = True
            j = j - 1
        elif start == True and (output[j] == " " or output[j] == "\n"):
            start = False
            output = output[:j] + "1" + output[j:]
        elif start == True:
            thing = thing + output[j]
            output = output[:j] + output[j + 1:]
            j = j - 1
        j = j + 1
    start = False
    j = 0
    while len(comp) > j:
        if comp[j] == '\\':
            thing2 = thing2 + comp[j]
            start = True
            comp = comp[:j] + comp[j + 1:]
            j = j - 1
        elif start == True and (comp[j] == " " or comp[j] == "\n"):
            start = False
            comp = comp[:j] + "1" + comp[j:]
        elif start == True:
            thing2 = thing2 + comp[j]
            comp = comp[:j] + comp[j + 1:]
            j = j - 1
        j = j + 1
    splitthing = thing.split("\\")
    splitthing2 = thing2.split("\\")
    if len(splitthing) > len(splitthing2):
        n = len(splitthing2)
    else:
        n = len(splitthing)
    j = 0
    i = 0
    while n > j:
        if SequenceMatcher(None, splitthing[j], splitthing2[j]).ratio() == 1:
            i = i + 1
        j = j + 1
    symbolratio = 0
    biggestlength = 0
    originallen = 0
    if len(output) > len(comp):
        originallen = outlength - (len(thing1)) + (len(splitthing) - 1)
        symbolratio = (i - 1) / originallen
        biggestlength = len(output)
    else:
        originallen = complength - (len(thing2)) + (len(splitthing2) - 1)
        symbolratio = (i - 1) / originallen
        biggestlength = len(comp)
    per = (SequenceMatcher(None, output, comp).ratio() *
           ((biggestlength) / originallen) + symbolratio) * 100
    print('The output was ' + str('{0:.2f}'.format(per)) + '% accurate\n')
    os.chdir('../')
    if results == True:
        resultsfile = open(os.getcwd() + '/results.txt', 'a')
        resultsfile.write('The output was ' + str('{0:.2f}'.format(per)) +
                          '% accurate\n')
        resultsfile.close()
        return

Exemple #23

0

Afficher le fichier

Fichier : match_dataframes.py Projet : Ahmad-Bamba/pyPDF-OCR

def are_similar(str1, str2):
    global THRESHOLD

    return SequenceMatcher(None, str1, str2).ratio() >= THRESHOLD

Exemple #24

0

Afficher le fichier

Fichier : baja2.py Projet : pulkitkomal/Auribises_Practice

import pandas as pd
from difflib import SequenceMatcher

df = pd.read_csv('players.csv')
z = input('Enter name of player: ')

for x in range(len(df['Name'])):
    a = df['Name'][x]
    v = a.replace('*', ' ')
    ratio = SequenceMatcher(None, v, z).ratio()
    if ratio >= 0.5:
        print('Name: ', df['Name'][x])
        print('Total number of matches played in Test', df['TestM'][x])
        print('Total runs in Test', df['TestRuns'][x])
        print('Bat avg in Test', df['TestBat Avg'][x])
        print('Wickets in Test', df['TestWkts'][x])
        print('Bowl avg in Test', df['TestBowl Avg'][x])

        print('Total number of matches played in ODI', df['ODIM'][x])
        print('Total runs in ODI', df['ODIRuns'][x])
        print('Bat avg in ODI', df['ODIBat Avg'][x])
        print('Wickets in ODI', df['ODIWkts'][x])
        print('Bowl avg in ODI', df['ODIBowl Avg'][x])

        print('Total number of matches played in T20', df['T20M'][x])
        print('Total runs in T20', df['T20Runs'][x])
        print('Bat avg in T20', df['T20Bat Avg'][x])
        print('Wickets in T20', df['T20Wkts'][x])
        print('Bowl avg in T20', df['T20Bowl Avg'][x])

Exemple #25

0

Afficher le fichier

Fichier : validate.py Projet : seasidesparrow/ADSMasterPipeline

    def fields_match(self, bibcode, s1, s2, field):

        if (field in s1) and (field in s2):
            f1 = s1[field]
            f2 = s2[field]
        elif (field not in s1) and (field not in s2):
            if field in self.new_fields:
                self.logger.warn('Bibcode {}: required new field {} not present'.format(bibcode, field))
                return 'required new field not in bibcode'
            else:
                self.logger.info('Bibcode {}: field {} not present in either database'.format(bibcode, field))
                return 'field not in bibcode'
        elif field not in s1:
            self.logger.info('Bibcode {}: field {} not present in old database'.format(bibcode, field))
            return 'field not in s1'
        elif field not in s2:
            self.logger.info('Bibcode {}: field {} not present in new database'.format(bibcode, field))
            return 'field not in s2'

        # for citations, sort and compare the lists
        if field == 'citation':
            if sorted(f1) != sorted(f2):
                self.logger.warn('Bibcode {}: different numbers of citations present in each database'.format(bibcode))
                return False
            else:
                return True

        # allow citation_count to be different by up to 3
        if field == 'citation_count':
            if abs(f1 - f2) > 3:
                self.logger.warn(
                    'Bibcode {}: citation_count field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2))
                return False
            else:
                return True

        # allow cite_read_boost to differ by up to 10%, unless one field is 0 and the other is non-zero
        if field == 'cite_read_boost':
            if (f1 == 0.) and (f2 == 0.):
                return True
            elif (f1 == 0. and f2 != 0.) or (f1 != 0. and f2 == 0.):
                self.logger.warn(
                    'Bibcode {}: cite_read_boost field is different between databases. Old: {} New: {}'.format(bibcode, f1,
                                                                                                               f2))
                return False
            elif (abs(f1-f2)/f1) > 0.1:
                self.logger.warn(
                    'Bibcode {}: cite_read_boost field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2))
                return False
            else:
                return True

        # CDS has changed to SIMBAD in new pipeline; check for this. Then check the rest of the sorted list
        if field == 'data':
            if ('CDS' in f1) and ('SIMBAD' in f2):
                f1.remove('CDS')
                f2.remove('SIMBAD')
            if sorted(f1) != sorted(f2):
                self.logger.warn(
                    'Bibcode {}: data field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2))
                return False
            else:
                return True

        # doctype intechreport has been changed to techreport
        if (field == 'doctype') and (f1 == 'intechreport') and (f2 == 'techreport'):
            return True

        # for identifier, sort first before comparing, since the order has changed
        if field == 'identifier':
            if sorted(f1) != sorted(f2):
                self.logger.warn('Bibcode {}: identifier field is different between databases. Old: {} New: {}'.format(bibcode, f1, f2))
                return False
            else:
                return True

        # for references, only check that the total number is the same (otherwise sorting
        # differences can confuse it)
        if field == 'reference':
            if len(f1) != len(f2):
                self.logger.warn(
                        'Bibcode {}: different numbers of references present in each database'.format(bibcode))
                return False
            else:
                return True

        if f1 != f2:
            # check how similar strings are
            if isinstance(f1, str) or (sys.version_info < (3,) and isinstance(f1, unicode)):
                ratio = SequenceMatcher(None, f1, f2).ratio()
                if ratio < 0.8:
                    if field == 'body':
                        self.logger.warn(
                            'Bibcode %s: unicode field %s is different between databases.', bibcode, field,)
                    else:
                        self.logger.warn('Bibcode %s: unicode field %s is different between databases. Old: %r New: %r', bibcode, field, f1, f2)
                    return False
                else:
                    if field == 'body':
                        self.logger.info(
                            'Bibcode %s: unicode field %s is slightly different between databases.',
                            bibcode, field)
                    else:
                        self.logger.info('Bibcode %s: unicode field %s is slightly different between databases. Old: %r New: %r', bibcode, field, f1, f2)
            else:
                self.logger.warn('Bibcode {}: field {} is different between databases. Old: {} New: {}'.format(bibcode, field, f1, f2))
                return False

        return True

Exemple #26

0

Afficher le fichier

Fichier : python-cheatsheet.py Projet : slerkpatomsak/scripts

def roundall(numbers):
    return map(int, map(round, numbers))

#
# Fuzzy string match.
# SequenceMatcher's first argument is a function that returns true for
# characters considered to be "junk". For instance, if blanks are junk,
# lambda x: x == " "
# To consider nothing as junk, pass None.
#
from difflib import SequenceMatcher

best_ratio = -1
best_match = None
for b in string_list:
    r = SequenceMatcher(None, matchname, b).ratio()
    if r > best_ratio:
        best_match = b
        best_ratio = r

#
# sorting + lambda examples.
#
# The cmp function is obsolete.
# Instead, use a key function,
# which is called on each element of the list prior to sorting.
# https://wiki.python.org/moin/HowTo/Sorting
#
def sort_by_last_letter(words):
    # sorted() returns a new sorted list.
    print sorted(words, key = lambda a: a[-1])

Exemple #27

0

Afficher le fichier

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

Exemple #28

0

Afficher le fichier

Fichier : main.py Projet : HuimingCheng/AutoGrading

def similar(a, b):
    return SequenceMatcher(None,
                           a.replace(" ", "").lower(),
                           b.replace(" ", "").lower()).ratio()

Exemple #29

0

Afficher le fichier

file_name = sys.argv[1]
mispelled_word = sys.argv[2]
#file_name="file2.csv"
#mispelled_word="you"

print("Contents of file : ", file_name)
a = []
with open(file_name, 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(", ".join((map(str, row))))
        a.append(row)

suggestions = []
for i in range(len(a)):
    matchRatio = SequenceMatcher(None, mispelled_word, a[i][0]).ratio()
    field = [a[i][0], matchRatio]
    suggestions.append(field)

suggestions.sort(key=lambda x: x[1], reverse=True)
#print(suggestions)
if len(suggestions) >= 5:
    rangeTill = 5
else:
    rangeTill = len(suggestions)
topsuggestions = []
for i in range(rangeTill):
    topsuggestions.append(suggestions[i][0])

print("\nTop Suggestions :")
print(", ".join((map(str, topsuggestions))))

Exemple #30

0

Afficher le fichier

def _longest_match_size(str1, str2):
    sq = SequenceMatcher(lambda x: x == " ", str1, str2)
    match = sq.find_longest_match(0, len(str1), 0, len(str2))
    return match.size