def lev(request): original_word = request.GET.get('original_word', '') new_word = request.GET.get('new_word', '') if len(original_word) == 0 or len(new_word) == 0: data = { 'status': 'error', 'message': "You must supply both 'original_word' & 'new_word' as GET params.", 'description': """It's Levenshtein-As-A-Service. You know, for the lulz. Please direct all VC monies to Daniel Lindsley.""", } else: data = { 'status': 'success', 'distance': pylev.levenschtein(original_word, new_word), } return Response(json.dumps(data), content_type='application/json')
def traverseDir(self, root): for dirName, subdirList, fileList in os.walk(root): mov = next( (x for x in fileList if any(os.path.splitext(x)[1] == mov_type for mov_type in mov_types)), None ) # 2 and more? self.mov.Text = mov if mov is None: continue if any(filter(lambda file: file == "folder.jpg", fileList)): continue # ifilter(lambda t: all(f(t) for f in [folder_filter, mov_filter]), fileList) _fileList = filter(lambda file: os.path.splitext(file)[1] == ".jpg", fileList) if _fileList == []: continue fileDict = {k: pylev.levenschtein(k, mov) for k in _fileList} _fileList = sorted(fileDict, key=fileDict.__getitem__) self.lv.Items.Clear() self.loc.Text = dirName map(lambda x: self.lv.Items.Add(x), _fileList) finished = yield "%s/%s" % (dirName, _fileList[0])
def traverseDir(self, root): for dirName, subdirList, fileList in os.walk(root): mov = next((x for x in fileList if any( os.path.splitext(x)[1] == mov_type for mov_type in mov_types)), None) # 2 and more? self.mov.Text = mov if mov is None: continue if any(filter(lambda file: file == 'folder.jpg', fileList)): continue #ifilter(lambda t: all(f(t) for f in [folder_filter, mov_filter]), fileList) _fileList = filter( lambda file: os.path.splitext(file)[1] == '.jpg', fileList) if _fileList == []: continue fileDict = {k: pylev.levenschtein(k, mov) for k in _fileList} _fileList = sorted(fileDict, key=fileDict.__getitem__) self.lv.Items.Clear() self.loc.Text = dirName map(lambda x: self.lv.Items.Add(x), _fileList) finished = yield '%s/%s' % (dirName, _fileList[0])
i = 0 lev_max = 1 levenstein_distance = [] for idx, row in data_train.iterrows(): provider_current = row['provider_lower'] count_current = row['count'] data_train_current = data_train[data_train['provider_lower'] != row['provider_lower']]['provider_lower'] closest_word = difflib.get_close_matches(provider_current, data_train_current, n=1, cutoff=0.6) if len(closest_word) > 0: match = closest_word[0] distance = levenschtein(provider_current, match) if distance <= lev_max: count_closest = data_train[data_train['provider_lower'] == match]['count'].item() if count_closest > count_current: #print "replace " + provider_current + " by " + match + " ( " + str(count_current) + " , " + str(count_closest) + ")" idx = data_train[data_train['provider_lower'] == provider_current].index data_train.loc[idx, 'replacing'] = match i = i + 1 if count_closest == count_current: i = i + 1 idx = data_train[data_train['provider_lower'] == provider_current].index data_train.loc[idx, 'replacing'] = match idx = data_train[data_train['provider_lower'] == match].index