Python SequenceMatcher Exemples, compat_difflib.SequenceMatcher Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : compat_ndiff.py Projet : yudaka/viewvc

def fcompare(f1name, f2name):
    f1 = fopen(f1name)
    f2 = fopen(f2name)
    if not f1 or not f2:
        return 0

    a = f1.readlines()
    f1.close()
    b = f2.readlines()
    f2.close()

    cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
    for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
        if tag == 'replace':
            fancy_replace(a, alo, ahi, b, blo, bhi)
        elif tag == 'delete':
            dump('-', a, alo, ahi)
        elif tag == 'insert':
            dump('+', b, blo, bhi)
        elif tag == 'equal':
            dump(' ', a, alo, ahi)
        else:
            raise ValueError, 'unknown tag ' + ` tag `

    return 1

Exemple #2

0

Afficher le fichier

Fichier : compat_ndiff.py Projet : manuvaldi/viewvc-wiki

def fcompare(f1name, f2name):
    f1 = fopen(f1name)
    f2 = fopen(f2name)
    if not f1 or not f2:
        return 0

    a = f1.readlines(); f1.close()
    b = f2.readlines(); f2.close()

    cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
    for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
        if tag == 'replace':
            fancy_replace(a, alo, ahi, b, blo, bhi)
        elif tag == 'delete':
            dump('-', a, alo, ahi)
        elif tag == 'insert':
            dump('+', b, blo, bhi)
        elif tag == 'equal':
            dump(' ', a, alo, ahi)
        else:
            raise ValueError, 'unknown tag ' + `tag`

    return 1

Exemple #3

0

Afficher le fichier

Fichier : compat_ndiff.py Projet : manuvaldi/viewvc-wiki

def fancy_replace(a, alo, ahi, b, blo, bhi):
    if TRACE:
        print '*** fancy_replace', alo, ahi, blo, bhi
        dump('>', a, alo, ahi)
        dump('<', b, blo, bhi)

    # don't synch up unless the lines have a similarity score of at
    # least cutoff; best_ratio tracks the best score seen so far
    best_ratio, cutoff = 0.74, 0.75
    cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
    eqi, eqj = None, None   # 1st indices of equal lines (if any)

    # search for the pair that matches best without being identical
    # (identical lines must be junk lines, & we don't want to synch up
    # on junk -- unless we have to)
    for j in xrange(blo, bhi):
        bj = b[j]
        cruncher.set_seq2(bj)
        for i in xrange(alo, ahi):
            ai = a[i]
            if ai == bj:
                if eqi is None:
                    eqi, eqj = i, j
                continue
            cruncher.set_seq1(ai)
            # computing similarity is expensive, so use the quick
            # upper bounds first -- have seen this speed up messy
            # compares by a factor of 3.
            # note that ratio() is only expensive to compute the first
            # time it's called on a sequence pair; the expensive part
            # of the computation is cached by cruncher
            if cruncher.real_quick_ratio() > best_ratio and \
                  cruncher.quick_ratio() > best_ratio and \
                  cruncher.ratio() > best_ratio:
                best_ratio, best_i, best_j = cruncher.ratio(), i, j
    if best_ratio < cutoff:
        # no non-identical "pretty close" pair
        if eqi is None:
            # no identical pair either -- treat it as a straight replace
            plain_replace(a, alo, ahi, b, blo, bhi)
            return
        # no close pair, but an identical pair -- synch up on that
        best_i, best_j, best_ratio = eqi, eqj, 1.0
    else:
        # there's a close pair, so forget the identical pair (if any)
        eqi = None

    # a[best_i] very similar to b[best_j]; eqi is None iff they're not
    # identical
    if TRACE:
        print '*** best_ratio', best_ratio, best_i, best_j
        dump('>', a, best_i, best_i+1)
        dump('<', b, best_j, best_j+1)

    # pump out diffs from before the synch point
    fancy_helper(a, alo, best_i, b, blo, best_j)

    # do intraline marking on the synch pair
    aelt, belt = a[best_i], b[best_j]
    if eqi is None:
        # pump out a '-', '?', '+', '?' quad for the synched lines
        atags = btags = ""
        cruncher.set_seqs(aelt, belt)
        for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
            la, lb = ai2 - ai1, bj2 - bj1
            if tag == 'replace':
                atags = atags + '^' * la
                btags = btags + '^' * lb
            elif tag == 'delete':
                atags = atags + '-' * la
            elif tag == 'insert':
                btags = btags + '+' * lb
            elif tag == 'equal':
                atags = atags + ' ' * la
                btags = btags + ' ' * lb
            else:
                raise ValueError, 'unknown tag ' + `tag`
        printq(aelt, belt, atags, btags)
    else:
        # the synch pair is identical
        print ' ', aelt,

    # pump out diffs from after the synch point
    fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)

Exemple #4

0

Afficher le fichier

Fichier : compat_ndiff.py Projet : yudaka/viewvc

def fancy_replace(a, alo, ahi, b, blo, bhi):
    if TRACE:
        print '*** fancy_replace', alo, ahi, blo, bhi
        dump('>', a, alo, ahi)
        dump('<', b, blo, bhi)

    # don't synch up unless the lines have a similarity score of at
    # least cutoff; best_ratio tracks the best score seen so far
    best_ratio, cutoff = 0.74, 0.75
    cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
    eqi, eqj = None, None  # 1st indices of equal lines (if any)

    # search for the pair that matches best without being identical
    # (identical lines must be junk lines, & we don't want to synch up
    # on junk -- unless we have to)
    for j in xrange(blo, bhi):
        bj = b[j]
        cruncher.set_seq2(bj)
        for i in xrange(alo, ahi):
            ai = a[i]
            if ai == bj:
                if eqi is None:
                    eqi, eqj = i, j
                continue
            cruncher.set_seq1(ai)
            # computing similarity is expensive, so use the quick
            # upper bounds first -- have seen this speed up messy
            # compares by a factor of 3.
            # note that ratio() is only expensive to compute the first
            # time it's called on a sequence pair; the expensive part
            # of the computation is cached by cruncher
            if cruncher.real_quick_ratio() > best_ratio and \
                  cruncher.quick_ratio() > best_ratio and \
                  cruncher.ratio() > best_ratio:
                best_ratio, best_i, best_j = cruncher.ratio(), i, j
    if best_ratio < cutoff:
        # no non-identical "pretty close" pair
        if eqi is None:
            # no identical pair either -- treat it as a straight replace
            plain_replace(a, alo, ahi, b, blo, bhi)
            return
        # no close pair, but an identical pair -- synch up on that
        best_i, best_j, best_ratio = eqi, eqj, 1.0
    else:
        # there's a close pair, so forget the identical pair (if any)
        eqi = None

    # a[best_i] very similar to b[best_j]; eqi is None iff they're not
    # identical
    if TRACE:
        print '*** best_ratio', best_ratio, best_i, best_j
        dump('>', a, best_i, best_i + 1)
        dump('<', b, best_j, best_j + 1)

    # pump out diffs from before the synch point
    fancy_helper(a, alo, best_i, b, blo, best_j)

    # do intraline marking on the synch pair
    aelt, belt = a[best_i], b[best_j]
    if eqi is None:
        # pump out a '-', '?', '+', '?' quad for the synched lines
        atags = btags = ""
        cruncher.set_seqs(aelt, belt)
        for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
            la, lb = ai2 - ai1, bj2 - bj1
            if tag == 'replace':
                atags = atags + '^' * la
                btags = btags + '^' * lb
            elif tag == 'delete':
                atags = atags + '-' * la
            elif tag == 'insert':
                btags = btags + '+' * lb
            elif tag == 'equal':
                atags = atags + ' ' * la
                btags = btags + ' ' * lb
            else:
                raise ValueError, 'unknown tag ' + ` tag `
        printq(aelt, belt, atags, btags)
    else:
        # the synch pair is identical
        print ' ', aelt,

    # pump out diffs from after the synch point
    fancy_helper(a, best_i + 1, ahi, b, best_j + 1, bhi)