Beispiel #1
0
    def create_occurence_file(self, filter=""):
        print("Construction de la matrice d'occurence par cluster\n")
        code = ""
        rc = self.getOccurenceCluster(self.models, filter)
        for r in range(len(rc)):
            tools.progress(r,len(rc)-1)
            code = code + "\n<h1>Cluster présent dans " + str(
                round(100 * rc["Occurence"][r])) + "% des algos</h1>"
            c = rc["Cluster"][r]
            code = code + c.print(self.ref_model.data, self.col_name) + "\n"
            code = code + "\n présent dans " + ",".join(rc["Model"][r]) + "\n"

        #print(tools.create_html("occurences", code, "http://f80.fr/cnrs"))

        dfOccurences = pd.DataFrame(
            data={"Cluster": rc["Cluster"], "Composition":rc["Composition"],"Model": rc["Model"], "Algos": rc["Algos"], "Occurence": rc["Occurence"]})
        l_items = list(set(self.ref_model.data[self.col_name].get_values()))

        for item in l_items:
            dfOccurences[item] = [0] * len(rc)
            print("\nTraitement de la mesure "+item)
            for i in range(len(rc)):
                tools.progress(i, len(rc))
                c = dfOccurences["Cluster"][i]
                dfOccurences[item][i] = c.labels.count(item)

        return dfOccurences
Beispiel #2
0
def pca_totrace(mod, ref_cluster, pca_offset=0):
    labels = mod.names()
    mesures = tools.normalize(mod.mesures())

    if mod.dimensions != 3:
        pca: decomp.pca.PCA = decomp.pca.PCA(n_components=3 + pca_offset)
        pca.fit(mesures)
        newdata = pca.transform(mesures)
    else:
        newdata = mesures.values

    li_data: list = []
    facets = []

    i = 0
    for c in mod.clusters:
        i = i + 1
        tools.progress(i, len(mod.clusters),
                       "Préparation des clusters pour rendu 3d")
        if len(c.clusters_distances) > 0:
            distances: pd.DataFrame = pd.DataFrame.from_dict(
                c.clusters_distances,
                orient="index",
                columns=["distance", "p1", "p2"])
            distances = distances.sort_values("distance")
            distances = distances[0:10]
            distances = distances.transpose()
            ss = distances.to_json()
        else:
            ss = "{}"
        #distances.sort_index(ascending=False)

        facets.append(c.get_3dhull(newdata, pca_offset))

        for k in range(len(c.index)):
            ind = c.index[k]
            x = newdata[ind, pca_offset]
            y = newdata[ind, pca_offset + 1]
            z = newdata[ind, pca_offset + 2]
            sp = {
                'index': ind,
                'x': x,
                'y': y,
                'z': z,
                'style': c.color,
                'label': labels[ind],
                'name': labels[ind],
                'size': 1,
                'form': 'sphere',
                'cluster': c.name,
                'ref_cluster': ref_cluster[ind],
                'cluster_distance': ss
            }
            li_data.append(sp)

    return li_data, facets
Beispiel #3
0
    def create_trace(self, url="http://f80.fr/cnrs", name="best_",limit=10000,withPerf=False,):
        print("\nTracés 3D et 2D des résultats.")
        name = name.replace(" ", "_")
        code = "Calcul du " + str(datetime.datetime.now()) + "\n\n"
        for i in range(0, min(limit,len(self.models))):
            tools.progress(i,min(limit,len(self.models)))
            code = code + "\nPosition " + str(i + 1) + "<br>"
            code = code + self.models[i].trace("./saved", name + str(i), url)
            if withPerf:code = code + self.models[i].print_perfs()

        tools.create_html("index_" + name, code, url)
Beispiel #4
0
    def init_distance_cluster(self):
        m = self.mesures().values
        i = 0

        for c1 in self.clusters:
            tools.progress(i, len(self.clusters))
            i = i + 1
            for c2 in self.clusters:
                if c1 != c2:
                    if c1.clusters_distances.get(c2.name) is None:
                        d = list(c1.distance_min(c2, m))
                        c1.clusters_distances[c2.name] = d
Beispiel #5
0
    def initByDistance(self, seuil=1):
        self.init_distances()
        l_edges=[]
        for i in range(0,len(self.distances)):
            tools.progress(i,len(self.distances),"Construction du graphe")

            for j in range(0, len(self.distances)):
                if self.distances[i,j]<seuil:
                    l_edges.append([i,j])

        self.graph.add_edges_from(l_edges)
        d:dict=dict(zip(range(0,len(self.data)),self.data[self.name_col]))
        nx.set_node_attributes(self.graph,d , "label")
        nx.set_node_attributes(self.graph, self.data[self.measures_col],self.measures_col)
Beispiel #6
0
def trace_artefact_GL(mod,
                      id="",
                      title="",
                      ref_model=None,
                      pca_offset=0,
                      autorotate=False,
                      add_property=[]):
    properties_dict: dict = create_dict_for_properties(mod.data, add_property)
    li_data, facets = pca_totrace(mod,
                                  mod.data['ref_cluster'],
                                  pca_offset=pca_offset)

    if len(add_property) > 0:
        for i in range(0, len(li_data)):
            tools.progress(i, len(li_data), "Ajout des propriétés")
            row = li_data[i]["index"]
            d: dict = properties_dict[row]
            li_data[i] = ({**li_data[i], **d})

    if ref_model is None or ref_model.clusters == mod.clusters:
        facets_ref = []
    else:
        tmp_li_data, facets_ref = pca_totrace(ref_model,
                                              ref_model.data['ref_cluster'],
                                              pca_offset=pca_offset)

    d = pd.concat([mod.data.ix[:, 0], mod.mesures()], axis=1, sort=False)

    toList = []
    for line in d.values:
        toList.append(list(line))

    code = render_template(
        "modele.html",
        title=title,
        name_zone="zone" + id,
        datas=li_data,
        components=list(mod.mesures().columns),
        autorotate=str(autorotate).lower(),
        data_source=toList,
        facets_ref=facets_ref,
        facets=facets,
        edges=[],
        url_to_render="/static/rendering/render.html?offset=" +
        str(pca_offset))
    return code
Beispiel #7
0
    def __init__(self,data=None,url:str="",remote_addr:str="",algo_loc:str=""):
        self.clusters.clear()
        if draw.colors is None or len(draw.colors) < 2: draw.colors = draw.init_colors(200)

        if len(url)>0:
            tools.progress(0,100,"Chargement du graphe")
            url=tools.getUrlForFile(url,remote_addr)
            if not self.load(url,algo_loc):
                if not self.graph is None:
                    self.save()
                    self.graph = nx.convert_node_labels_to_integers(self.graph, label_attribute="name")

            if not self.graph is None:
                tools.progress(90,100,"Préparation")
                self.data: pd.DataFrame = pd.DataFrame(index=list(range(0,len(self.graph.nodes))))
                self.data["name"]=list(self.graph.nodes.keys())
                self.dimensions = 3
                self.name_col = "name"

        if not data is None:
            super().__init__(data=data)
            self.graph=nx.Graph()
Beispiel #8
0
    def init_metrics(self,showProgress=False):
        rc=""
        self.metrics: pd.DataFrame = pd.DataFrame()
        print("Calcul des métriques")
        print("\nPremière passe")
        true_labels=self.ref_model.cluster_toarray()

        for i in range(len(self.models)):
            if showProgress:tools.progress(i, len(self.models))
            m:algo.model=self.models[i]
            m.init_metrics(true_labels)

        print("Tri des "+str(len(self.models))+" modeles")
        self.models.sort(key=lambda x: x.score, reverse=True)

        print("\n2eme passe")
        for i in range(len(self.models)):
            if showProgress:tools.progress(i, len(self.models))
            m = self.models[i]
            self.metrics = self.metrics.append(m.toDataframe(true_labels))
            rc=rc+m.print_perfs()

        return rc
Beispiel #9
0
    def findClusters(self,prefixe="cl_",method="gn",k=5,iter=15):
        if not self.load_cluster(self.url+"_"+method+str(k)+str(iter)):
            tools.progress(0, 100, "Recherche des communautés avec "+method)

            #Initialisation a un cluster unique
            comm=[set(range(0,len(self.graph.nodes)))]

            if method.startswith("gn"):
                tmp=nx.algorithms.community.girvan_newman(self.graph)
                comm=tuple(sorted(c) for c in next(tmp))

            if method.startswith("lab"):
                comm=nx.algorithms.community.label_propagation_communities(self.graph)

            if method.startswith("mod"):
                comm=nx.algorithms.community.greedy_modularity_communities(self.graph)

            if method.startswith("async"):
                try:
                    comm = nx.algorithms.community.asyn_fluidc(self.graph,k=k,max_iter=iter)
                except:
                    tools.progress(100,100,"Impossible d'exécuter async_fluid")


            i=0
            for c in comm:
                cl=cluster(prefixe+str(i),index=list(c),color=draw.colors[i % len(draw.colors)])
                i=i+1
                tools.progress(i, 100, "Fabrication des clusters")
                self.clusters.append(cl)

            tools.progress(100, 100, "Clustering terminé")
            self.save_cluster(self.url+"_"+method)

        else:
            tools.progress(100,100,"Chargement des clusters")
Beispiel #10
0
    def clusters_from_labels(self, labels: np.ndarray, colors, name="cl_"):

        #offset=min(old_labels)+10000
        #labels=[x+offset for x in old_labels]

        d = dict()
        i = 0
        for l in labels:
            if not l in d.keys(): d[l] = []
            d[l].append(i)
            i = i + 1

        i = 0
        for k in d.keys():
            i = i + 1
            tools.progress(i, len(d), "Construction des clusters")
            color = colors[i % len(colors)]
            if k != -1:
                c: cluster = cluster(name + str(i),
                                     index=d[k],
                                     color=color,
                                     pos=i)
                c.findBestName(self.data[self.name_col], "cl" + str(i) + "_")
                self.clusters.append(c)
Beispiel #11
0
    def load(self,url,algo_loc=""):
        self.url=bytes(base64.encodebytes(bytes(url+algo_loc,encoding="utf-8"))).hex()
        if os.path.exists("./clustering/"+self.url+".gpickle"):
            tools.progress(50,100,"Chargement depuis le cache")
            self.graph = nx.read_gpickle("./clustering/"+self.url+".gpickle")
            return True
        else:

            url=tools.dezip(url)

            if ".gml" in url or ".graphml" in url :
                tools.progress(50, 100, "Chargement du fichier au format GML")
                try:
                    self.graph =nx.read_gml(url)
                except:
                    try:
                        self.graph=nx.read_graphml(url)
                    except:
                        return False


            if ".gexf" in url or ".gephi" in url:
                try:
                    self.graph = nx.read_gexf(url)
                except:
                    print("Impossible de lire "+url)


            if self.graph is None:
                tools.progress(50, 100, "Chargement depuis la matrice de distance")
                try:
                    self.data: pd.DataFrame = tools.get_data_from_url(url,"")
                except:
                    pass
                if not self.data is None:
                    self.create_graph_from_dataframe()
                    return True

            return False
Beispiel #12
0
    def __init__(self,data:pd.DataFrame,no_metric=False,format:dict=dict()):

        if draw.colors is None or len(draw.colors) < 2: draw.colors = draw.init_colors(200)
        self.data = data

        #Réglage des parametres
        if not "name" in format:
            format["name"]=[data.columns[0]]# Le libellé des mesures est pris sur la premiere colonne

        if not "measures" in format:
            format["measures"]=data.columns[range(1,len(data.columns.values))]

        if not "properties" in format:
            format["properties"]=[]

        # if not "properties" in format:
        #     #Par defaut les propriétées sont entre les mesures et l'index
        #     if int(format["name"])+1<min(list(format["measures"]))-1:
        #         format["properties"]=data.columns[list(range(format["name"]+1,min(format["measures"])-1))]
        #     else:
        #         format["properties"]=[]

        self.col_name = format["name"][0]
        self.col_measures=format["measures"]
        self.col_properties = format["properties"]

        i = 0
        for c in data[format["measures"]]:
            tools.progress(i, len(format["measures"]), "Conversion des chaines de caractères de " + c)
            i = i + 1
            if data[c].dtype == object and len(data[c]) > 0:
                l_values=set(data[c])
                items=dict(zip(l_values,[0]*len(l_values)))
                ref=data[c][1]

                if tools.getComplexity(data[c])>90:
                    data[c]=tools.tokenize(data[data.columns[i]])
                else:
                    for item in items.keys():
                        d=stringdist.levenshtein(item, ref)
                        items[item]=d

                    if len(items)<100:
                        data[c]=data[c].replace(items.keys(),items.values())
                    else:
                        l=[]
                        for k in data[c]:
                            l.append(items[k])

                        data[c]=l



        if not "cluster" in list(self.data.columns):
            if not "cluster" in format:
                format["cluster"]=[self.col_name]

            self.data["ref_cluster"] = self.create_ref_cluster_from_name(self.data, format["cluster"][0])

        self.dimensions = len(format["measures"])  # Les composantes sont les colonnes suivantes

        self.ref_model: algo.model = self.init_reference_model()
        if not no_metric:
            self.ref_model.init_metrics(self.ref_model.cluster_toarray())
Beispiel #13
0
 def save(self,path=""):
     tools.progress(0,100,"Enregistrement du fichier dans le cache")
     if len(path)==0:path="./clustering/"+self.url+".gpickle"
     if not path.endswith(".gpickle"):path=path+".gpickle"
     nx.write_gpickle(self.graph,path)
Beispiel #14
0
    def node_treatments(self):
        G=self.graph
        tools.progress(0,100,"Degree de centralité")
        if len(nx.get_node_attributes(G,"centrality"))==0:
            nx.set_node_attributes(G,nx.degree_centrality(G),"centrality")

        tools.progress(20, 100, "Degree de betweeness")
        if len(nx.get_node_attributes(G, "betweenness")) == 0:
            nx.set_node_attributes(G, nx.betweenness_centrality(G), "betweenness")

        tools.progress(40, 100, "Degree de closeness")
        if len(nx.get_node_attributes(G, "closeness")) == 0:
            nx.set_node_attributes(G, nx.closeness_centrality(G), "closeness")

        tools.progress(60, 100, "Page rank")
        try:
            if len(nx.get_node_attributes(G, "pagerank")) == 0:
                nx.set_node_attributes(G, nx.pagerank(G), "pagerank")
        except:
            pass

        tools.progress(80, 100, "Hub and autorities")
        try:
            if len(nx.get_node_attributes(G, "hub")) == 0:
                hub, aut = nx.hits(G)
                nx.set_node_attributes(G, hub, "hub")
                nx.set_node_attributes(G, aut, "autority")
        except:
            pass

        #tools.progress(90, 100, "Excentricity")
        #nx.set_node_attributes(G, nx.eccentricity(G), "eccentricity")

        self.node_treatment=True
        tools.progress(100, 100, "Fin des traitements")
Beispiel #15
0
    def init_metrics(self, labels_true):
        mes = self.mesures()
        i = 0
        for c in self.clusters:
            tools.progress(i, len(self.clusters),
                           "Initialisation des metrics des clusters")
            c.init_metrics(mes)
            i = i + 1

        if len(self.clusters) > 2:
            labels = self.cluster_toarray()
            tools.progress(10, 100, "Score de silhouete")
            self.silhouette_score = metrics.silhouette_score(
                self.mesures(), labels)

            tools.progress(40, 100, "Rand Index")
            self.rand_index = metrics.adjusted_rand_score(labels_true, labels)

            #self.self.adjusted_mutual_info_score=metrics.self.adjusted_mutual_info_score(labels_true,labels)

            tools.progress(50, 100, "Homogénéité")
            self.homogeneity_score = metrics.homogeneity_score(
                labels_true, labels)

            tools.progress(60, 100, "Completeness")
            self.completeness_score = metrics.completeness_score(
                labels_true, labels)

            tools.progress(70, 100, "V-mesure")
            self.v_measure_score = metrics.v_measure_score(labels_true, labels)

            self.score = (
                (self.silhouette_score + 1) +
                (self.rand_index + 1) * 1.5 + self.v_measure_score) / 6
            self.score = round(self.score * 20 * 100) / 100

            tools.progress(100, 100, "Calcul metriques terminé")
            if len(self.clusters) < 3:
                self.init_distance_cluster()

        else:
            self.silhouette_score = 0
            self.score = 0
            self.rand_index = 0
            self.homogeneity_score = 0
            self.completeness_score = 0
            self.v_measure_score = 0

        return self.print_perfs()