def distMatrix(clusters): """ Calculate the all-pairs distance matrix between the given clusters @type clusters: dict @param clusters: The list of clusters to calculate the all-pairs distance matrix, keyed on cluster ID. The new cluster is assumed to be the last element. @rtype: tuple @return: The all-pairs distance matrix and the pair with the overall minimum distance """ min = None; minpair = None dist = {} ids = np.sort(clusters.keys()) for i in ids: dist[i] = {} for j in ids[i+1:ids.shape[0]]: dist[i][j] = util.nonSymmetricClusterDistance(clusters[i], clusters[j]) if (min is None) or (dist[i][j] < min): min = dist[i][j] minpair = (i,j) return dist, minpair
def updateDistMatrix(clusters, matrix, remIDs, newID): """ Selectively update the distance calculations by deleting the removed clusters and adding calculations for the replacement cluster :@type clusters: dict :@param clusters: The data split into clusters stored in numpy arrays, keyed on ID. :@type matrix: dict :@param matrix: A dict containing one dict for each cluster, keyed on the cluster id with the distance as the value. :@type remIDs: tuple :@param remIDs: The ids of the removed clusters. :@type newID: int :@param newID: The id of the replacement cluster. """ # remove rows for i in remIDs: del matrix[i] for i in np.sort(clusters.keys()): # remove the old cluster distance calculations (columns) if i != newID: for j in remIDs: if j in matrix[i]: del matrix[i][j] # calculate distances to the new cluster matrix[i][newID] = util.nonSymmetricClusterDistance(clusters[i], clusters[newID]) # calculate the minpair: # inner min finds the minpairs for each row, outer min finds overall minpair minpair = min([(i,min(matrix[i], key=lambda z:matrix[i].get(z))) for i in matrix], key=lambda p: matrix.get(p[0]).get(p[1])) # Add empty distance dict for later updates matrix[newID] = {} return matrix, minpair