def measure (self, stringset): """ Qualify measures. """ measures = {} lass = containers.LengthAscendingStrings (stringset) edit_array2d = leven.edit_distance_array2d (lass) jaccdist_array2d = jaccard.jaccard_array2d(lass)['distance'] size = len(edit_array2d) avg_edit = edit_array2d.sum() / float(size*(size-1)) avg_jaccdist = jaccdist_array2d.sum() / float(size*(size-1)) measures['EditAvg'] = avg_edit measures['JaccDistAvg'] = avg_jaccdist measures['Size'] = size #measures['timing'] = return measures
def __init__ (self, strings, **kwargs): # Re-ordered strings (read-only) # Properties: lenbounds[OrderedDict] self.lass = containers.LengthAscendingStrings (strings) # Container[dict mapping int to set] for cluster manipulation # Properties: objs[TurboList], cids[list], # unclustered_objs[TurboList], merge() self.clusters = containers.Clusters (self.lass) # Corresponding distance metrics self.editdist = leven.edit_distance_array2d (self.lass) # Jaccard or charset related metrics self.jaccarrays = jaccard.jaccard_array2d(self.lass)