Ejemplo n.º 1
0
def update_clustroid(cluster):
    """Update the clustroid in a cluster using the minimum sum of squares"""
    sum_of_squares = []
    for i in xrange(len(cluster)):
        summ = 0
        for j in xrange(i + 1, len(cluster)):
            summ += pow(edit_dist(cluster[i], cluster[j]), 2)
        sum_of_squares.append(summ)
    c_id = argmin(sum_of_squares)
    cluster.append(cluster[c_id])
    del cluster[c_id]
Ejemplo n.º 2
0
def set_clustroid_mate(listoclusters):
    """For each cluster, set the [-2] element as the closest to the clustroid
    """
    min_dist = float('inf')
    for cluster in listoclusters:
        if len(cluster) == 1:
            continue
        for i in xrange(len(cluster) - 1):
            dist = edit_dist(cluster[i], cluster[-1])
            if min_dist > dist:
                min_dist = dist
                mate_idx = i
        cluster[-2], cluster[mate_idx] = cluster[mate_idx], cluster[-2]
Ejemplo n.º 3
0
def clusterize(listoclusters):
    """Create clusters starting from a list of lists of single elements"""
    cluster_size = 1
    while cluster_size < settings.CLUSTER_MAX_SIZE:
        min_dist = float('inf')
        for i in xrange(len(listoclusters)):
            for j in xrange(i + 1, len(listoclusters)):
                dist = edit_dist(listoclusters[i][-1], listoclusters[j][-1])
                if dist < min_dist:
                    min_dist = dist
                    to_merge = (i, j)
        listoclusters[to_merge[0]].extend(listoclusters[to_merge[1]])
        update_clustroid(listoclusters[to_merge[0]])
        cluster_size = max(len(l) for l in listoclusters)

    set_clustroid_mate(listoclusters)