Ejemplo n.º 1
0
Archivo: lev.py Proyecto: ixtel/LAAS
def lev(request):
    original_word = request.GET.get('original_word', '')
    new_word = request.GET.get('new_word', '')

    if len(original_word) == 0 or len(new_word) == 0:
        data = {
            'status': 'error',
            'message': "You must supply both 'original_word' & 'new_word' as GET params.",
            'description': """It's Levenshtein-As-A-Service. You know, for the lulz.

            Please direct all VC monies to Daniel Lindsley.""",
        }
    else:
        data = {
            'status': 'success',
            'distance': pylev.levenschtein(original_word, new_word),
        }

    return Response(json.dumps(data), content_type='application/json')
Ejemplo n.º 2
0
    def traverseDir(self, root):
        for dirName, subdirList, fileList in os.walk(root):
            mov = next(
                (x for x in fileList if any(os.path.splitext(x)[1] == mov_type for mov_type in mov_types)), None
            )  # 2 and more?
            self.mov.Text = mov
            if mov is None:
                continue
            if any(filter(lambda file: file == "folder.jpg", fileList)):
                continue
            # ifilter(lambda t: all(f(t) for f in [folder_filter, mov_filter]), fileList)
            _fileList = filter(lambda file: os.path.splitext(file)[1] == ".jpg", fileList)

            if _fileList == []:
                continue
            fileDict = {k: pylev.levenschtein(k, mov) for k in _fileList}
            _fileList = sorted(fileDict, key=fileDict.__getitem__)
            self.lv.Items.Clear()
            self.loc.Text = dirName
            map(lambda x: self.lv.Items.Add(x), _fileList)
            finished = yield "%s/%s" % (dirName, _fileList[0])
Ejemplo n.º 3
0
    def traverseDir(self, root):
        for dirName, subdirList, fileList in os.walk(root):
            mov = next((x for x in fileList if any(
                os.path.splitext(x)[1] == mov_type for mov_type in mov_types)),
                       None)  # 2 and more?
            self.mov.Text = mov
            if mov is None:
                continue
            if any(filter(lambda file: file == 'folder.jpg', fileList)):
                continue
            #ifilter(lambda t: all(f(t) for f in [folder_filter, mov_filter]), fileList)
            _fileList = filter(
                lambda file: os.path.splitext(file)[1] == '.jpg', fileList)

            if _fileList == []:
                continue
            fileDict = {k: pylev.levenschtein(k, mov) for k in _fileList}
            _fileList = sorted(fileDict, key=fileDict.__getitem__)
            self.lv.Items.Clear()
            self.loc.Text = dirName
            map(lambda x: self.lv.Items.Add(x), _fileList)
            finished = yield '%s/%s' % (dirName, _fileList[0])
Ejemplo n.º 4
0
i = 0
lev_max = 1
levenstein_distance = []
for idx, row in data_train.iterrows():
    provider_current = row['provider_lower']
    count_current = row['count']
    data_train_current = data_train[data_train['provider_lower'] !=
                                    row['provider_lower']]['provider_lower']
    closest_word = difflib.get_close_matches(provider_current,
                                             data_train_current,
                                             n=1,
                                             cutoff=0.6)
    if len(closest_word) > 0:
        match = closest_word[0]
        distance = levenschtein(provider_current, match)
        if distance <= lev_max:
            count_closest = data_train[data_train['provider_lower'] ==
                                       match]['count'].item()
            if count_closest > count_current:
                #print "replace " + provider_current +  " by " + match + " ( " + str(count_current) + " , " + str(count_closest) + ")"
                idx = data_train[data_train['provider_lower'] ==
                                 provider_current].index
                data_train.loc[idx, 'replacing'] = match
                i = i + 1
            if count_closest == count_current:
                i = i + 1
                idx = data_train[data_train['provider_lower'] ==
                                 provider_current].index
                data_train.loc[idx, 'replacing'] = match
                idx = data_train[data_train['provider_lower'] == match].index