Esempio n. 1
0
def printDiffs(wordcount, clusters):
    diffCounts = {}
    for clusterSet in clusters.values():
        for cluster in clusterSet:
            if len(cluster) > 1:
                ranked = sorted(list(cluster), key=wordcount.get, reverse=True)
                diffs = StringDistance.pythonDiff(ranked[0], ranked[1], path=True)
                for diff in diffs:
                    if not diff in diffCounts:
                        diffCounts[diff] = 0
                    diffCounts[diff] += 1
    for diff, ct in diffCounts.iteritems():
        print(diff, ct, sep='\t')
Esempio n. 2
0
def printEdits(wordcount, clusters):
    editCounts = {}
    for clusterSet in clusters.values():
        for cluster in clusterSet:
            if len(cluster) > 1:
                ranked = sorted(list(cluster), key=wordcount.get, reverse=True)
                edits = StringDistance.levenshtein(ranked[0], ranked[1], path=True)
                for edit in edits[0]:
                    if not edit in editCounts:
                        editCounts[edit] = 0
                    editCounts[edit] += 1
    for edit, ct in editCounts.iteritems():
        print(edit, ct, sep='\t')