target = False
    force = False
    
    file_template = '../results/bm_{set}_swfc_%d.csv'%NC

    best_centroids = np.loadtxt(file_template.format(set='centroids'),delimiter=",")
    best_weights = np.loadtxt(file_template.format(set='weights'),delimiter=",")
    best_u = np.loadtxt(file_template.format(set='u'),delimiter=",")

    clusters = np.argmax(best_u,axis=1) 


    N,ND = data.shape

    knn = 15
    
    #create neighbourdhood
    print("building neighbourhood, location.shape=",locations.shape)
    kdtree = cKDTree(locations)
    neighbourhood,distances = make_neighbourhood(kdtree,locations,knn,max_distance=2.0)
    distances = np.array(distances)
    
    #spatial
    verbose_level = 2
    clusters_graph = np.int32(graph_cut(locations,neighbourhood,best_u,unary_constant=70.0,smooth_constant=15.0,verbose=1))
    
    #for i in range(N):
    #    print(i,clusters[i],best_u[i],clusters_graph[i],sep=',')
    
    np.savetxt("../results/final_bm_clusters_swfc_%d.csv"%NC,clusters_graph,delimiter=",",fmt="%d")
    knn = 8

    #create neighbourdhood EW
    print("building neighbourhood, location.shape=", locations.shape)
    kdtree = cKDTree(locations_ore)
    neighbourhood, distances = make_neighbourhood(kdtree,
                                                  locations_ore,
                                                  knn,
                                                  max_distance=np.inf)
    distances = np.array(distances)

    #spatial EW
    verbose_level = 2
    clusters_graph_ew = graph_cut(locations_ore,
                                  neighbourhood,
                                  best_u_ew,
                                  unary_constant=100.0,
                                  smooth_constant=80.0,
                                  verbose=1)

    for i in range(N):
        print(i,
              cluster_ew[i],
              best_u_ew[i],
              cluster_ew[neighbourhood[i]],
              clusters_graph_ew[i],
              sep=',')

    np.savetxt("../results/final_2d_clusters_sfcew_4.csv",
               clusters_graph_ew,
               delimiter=",",
               fmt="%.4f")
                break

        centroid = np.asfortranarray(best_centroids, dtype=np.float32)
        weights = np.asfortranarray(best_weights, dtype=np.float32)
        clusters = np.asfortranarray(clusters, dtype=np.int8)

        ret_fc = cl.clustering.dbi_index(centroid, data, clusters, weights)
        ret_sill = cl.clustering.silhouette_index(data, clusters, weights)

        print("WFC: DB,Sil:", NC, ret_fc, ret_sill, sep=',')

        #Spatial correction
        clusters_graph = np.int32(
            graph_cut(locations,
                      neighbourhood,
                      best_u,
                      unary_constant=70.0,
                      smooth_constant=15.0,
                      verbose=0))
        centroids_F = np.asfortranarray(np.empty((NC, ND)), dtype=np.float32)

        #calculate centroids back
        for k in range(NC):
            indices = np.where(clusters_graph == k)[0]
            centroids_F[k, :] = np.mean(data[indices, :], axis=0)

        clusters = np.asfortranarray(clusters_graph, dtype=np.int8)
        ret_swfc_dbi = cl.clustering.dbi_index(centroids_F, data, clusters,
                                               weights)
        ret_swfc_sill = cl.clustering.silhouette_index(data, clusters, weights)
        print("SWFC: DB,Sil:", NC, ret_swfc_dbi, ret_swfc_sill, sep=',')
Beispiel #4
0
def swc(data,locations,ndim,nclusters,distance_function_weighted,
        adj_set,
        alpha=1.2,
        lambda_value = 0.2,
        inc_eta=0.1,
        max_iterations=50,
        verbose=False,
        apply_spatial_step = False,
        apply_spatial_end = False,
        full_stats =False
        ):
    #this function applies spatial constraint at the end only
    n,p = data.shape

    logging.info("Spatial Weighted clustering")    
    logging.info("clustres: %d",nclusters)    
    logging.info("data shape: %dx%d",n,p)    
    logging.info("alpha: %f",alpha)    
    logging.info("lambda_value: %f",lambda_value)    


    #calculate dissimilarity matrix setting weights = 1.0
    logging.info("Calculating dissimilarity matrix...")    
    weights = np.ones(ndim)
    dissimilarity = distance_function_weighted.dissimilarity(data,data,debug=False)
    logging.info("Calculating dissimilarity matrix. DONE")    
    
    #np.savetxt("../outputs/diss_targets.csv",dissimilarity[:,:,4])
    #quit()

    #initial weights
    weights = np.ones((nclusters,ndim)) / ndim
    
    #initial partition is random
    u=None

    iterations = 0
    eta = lambda_value
    
    init_u = None
    
    #outter loop is for eta
    while True:
        #inner loop looks at weights to stabilise
        #logging.info("current iteration: %d",iterations)
        weights_old = weights.copy()
        #set new weights
        #distance_function_weighted.set_weights(weights)
        #distance_function_weighted.set_lambda(eta)
        
        distance_function_tmp = lambda x,y: distance_function_weighted.distance_centroids(x,y,weights=weights)


        prototype, u, u0, d, jm, p, fpc = fuzzy_cmeans(data, nclusters, alpha, distance_function_tmp,init=init_u,verbose=verbose)
        init_u = u.copy()

        logging.info("fuzzy cmeans: inital jm=%f, last jm=%f, fpc=%f",jm[0],jm[-1],fpc)

        stat = {
                "step":iterations,
                "eta":eta,
                "iterations":p,
                "jm":jm[-1],
                "fpc":fpc,
            }

        #clusters
        clusters = np.argmax(u,axis=1)
        clusters_dict = create_clusters_dict(clusters)
        
        
        print('prototype',iterations,prototype)
        #np.savetxt("../outputs/u-{0}.csv".format(iterations),u)
        #quit()

        #fuzzy dispersion
        #skl = fuzzy_dispersion_loop(dissimilarity,u**alpha)
        #print skl
        
        #skl = dispersion_loop(dissimilarity,nclusters,clusters_dict)
        #print skl
        
       
        #fuzzy score
        #try:
        #except Exception as e:
        #    print e
        #    fuzzy_score = -666
            
        #logging.info("fuzzy score: %f",fuzzy_score)


        #report
        #np.savetxt("../outputs/D.csv",D)
        
        #try:
        #D = distance_function_weighted.distance_max(data,data,nclusters,clusters,weights)
        #score = silloutte(D,clusters,nclusters)
        #except Exception as e:
        #    print e
        #    score = -666


        #vpc = partition_coefficient(u)
        #vpe = partition_entropy(u)
        compact = 0.0 #compactness(nclusters,clusters,adj_set)




        if full_stats:
            centroids = prototype.T
            
            score = silhouette_score(data,clusters)

            score_vpc = vpc(u)
            score_mvpc = mvpc(u,pre_vpc=score_vpc)
            score_vpe = vpe(u)
            score_vavcd = vavcd(data,u,alpha,centroids)
            score_vfs = vfs(data,u,alpha,centroids)
            score_vxb = vxb(data,u,alpha,centroids)
            score_vmcd = vmcd(centroids)

            stat["score_vpc"] = score_vpc
            stat["score_mvpc"] = score_mvpc
            stat["score_vpe"] = score_vpe
            stat["score_vavcd"] = score_vavcd
            stat["score_vfs"] = score_vfs
            stat["score_vxb"] = score_vxb
            stat["score_vmcd"] = score_vmcd
                
        logging.info("stats before spatial=%s",stat)
        
        
        #update weiths
        if apply_spatial_step:
            #spatial
            clusters_graph = graph_cut(locations,adj_set,u)
            clusters_graph_dict = create_clusters_dict(clusters_graph)
            clusters = clusters_graph.copy()
            weights = calculate_weights(dissimilarity,nclusters,clusters_graph_dict,lambda_value)
        else:
            weights = calculate_weights_fuzzy(dissimilarity,u**alpha,lambda_value,debug=False)

        for i in range(nclusters):
            logging.debug("weights[cluster=%d]=%s",i,weights[i,:])
        #print weights.shape,weights_old.shape
        
        #print "weights",weights
        
        #np.testing.assert_allclose(np.sum(weights,axis=1),1.0)

        iterations += 1
        eta = eta + inc_eta*lambda_value
        diff = np.sum((weights - weights_old)**2)
        
        logging.info("diff weights=%f",diff)
        #logging.info("weights=%s",weights)
        #stop condition
        if iterations >= max_iterations:
            break
        else:
            #calculate weights difference
            if diff < 1e-3:
                break
        
    #stats += [stat]        

    if apply_spatial_end:
        #spatial at end
        clusters_graph = graph_cut(locations,adj_set,u)
    else:
        clusters_graph = None

    return clusters,prototype,u,weights,stat,clusters_graph
            #save data
            new_data = np.c_[locations, clusters]

            #np.savetxt(filename_template.format(tag='clusters',nc=NC),new_data,delimiter=",",fmt="%.4f")
            #np.savetxt(filename_template.format(tag='centroids',nc=NC),current_centroids,delimiter=",",fmt="%.4f")
            #np.savetxt(filename_template.format(tag='u',nc=NC),best_u,delimiter=",",fmt="%.4f")
            #np.savetxt(filename_template.format(tag='weights',nc=NC),best_weights,delimiter=",",fmt="%.4f")

            if abs(best_energy_centroids - best_energy_weights) < 1e-2:
                break

        #spatial correction
        clusters_graph = graph_cut(locations,
                                   neighbourhood,
                                   best_u,
                                   unary_constant=100.0,
                                   smooth_constant=30.0,
                                   verbose=0)

        centroid = np.asfortranarray(best_centroids, dtype=np.float32)
        weights = np.asfortranarray(best_weights, dtype=np.float32)
        clusters = np.asfortranarray(clusters, dtype=np.int8)

        ret_fc = cl.clustering.dbi_index(centroid, data, clusters, weights)
        ret_sill = cl.clustering.silhouette_index(data, clusters, weights)

        clusters = np.asfortranarray(clusters_graph, dtype=np.int8)
        ret_fc_sc = cl.clustering.dbi_index(centroid, data, clusters, weights)
        ret_sill_sc = cl.clustering.silhouette_index(data, clusters, weights)

        print("DB Index:",