def correlation_by_shapelets_grandeur(args): df_gr_norm,cols = transformed_dataset(args["chemin_datasets"], args["grandeur"], \ args["fenetre_slicing"], args["date_debut"],\ args["date_fin"]); logging.debug("correlation_by_shapelet_grandeur : %s columns = %s",args["grandeur"], df_gr_norm.columns.tolist()) if df_gr_norm.empty : dico_gp = dict(); matE_gp = pd.DataFrame(columns=cols,index=cols) matE_gp.fillna(0, inplace = True); logging.debug("correlation_by_shapelet_grandeur : matE_(%s) is empty: all columns are empty", args["grandeur"]) return dico_gp, matE_gp; label_aretes = shapelet.label_arcs(df_gr_norm.columns.tolist()); data = []; for arete, label in label_aretes.items(): data.append((df_gr_norm[arete].values, label)) shapelet_dict = dict(); # shapelet_dict = shapelet.extract_shapelets_v1(data, args["min_len"], args["max_len"]); shapelet_dict = shapelet.extract_shapelets(data, args["min_len"], \ args["max_len"]); df_data = pd.DataFrame(shapelet_dict); #delete empty columns # df_data.drop(list(df_data.columns[df_data.isnull().all()]), axis = 1, inplace = True); columns = df_gr_norm.columns; # return df_data; ## normalized df_data print("df_data: mean={}, std={}".format(df_data.mean(), df_data.std())) df_data = distance_similarite.normalize(df_data) #(df_data - df_data.mean()) / df_data.std(); ## correlation par une metrique choisi dico_gp = dict(); matE_gp = pd.DataFrame(columns=cols,index=cols); for arete1, arete2 in fct_aux.range_2d(columns): correl = distance_similarite.distance_pearson(df_data[arete1], \ df_data[arete2]); matE_gp.loc[arete1, arete2] = abs(correl); matE_gp.loc[arete2, arete1] = abs(correl); dico_gp[(arete1,arete2)] = abs(correl); logging.debug("correlation_by_shapelet_grandeur : (%s, %s) = %s", \ arete1, arete2, abs(correl)) if "modifCorrel" in args.keys() and args["modifCorrel"]: args["matE_gp"] = matE_gp; matE_gp = ajusterCorrelation.ajouter_mesures_correlations(args); dico_gp_tmp = dict(); for arc1, arc2 in fct_aux.range_2d(matE_gp.columns): dico_gp_tmp[(arc1,arc2)] = matE_gp.loc[arc1, arc2] dico_gp = dico_gp_tmp; matE_gp.fillna(0, inplace = True); matE_gp.to_csv(args["chemin_matrices"]+"matrice_adjacence_proba_"+\ args["grandeur"]+".csv"); analyse(dico_gp, args["grandeur"],args,critere=0.9); print("grandeur {} termine".format( args["grandeur"]) ) logging.debug("correlation_by_shapelet_grandeur : grandeur %s termine", \ args["grandeur"]) return dico_gp, matE_gp;
def comparateur_correlation(matE_gr, matE_gr_cp, cpt_modif): """ retourner le nombre de correlations modifies """ if set(matE_gr.columns.tolist()) != set(matE_gr_cp.columns.tolist()): print("matE_gr={} et matE_gr_cp={} sont de dimension differentes".format(\ len(set(matE_gr.columns.tolist())), len(set(matE_gr_cp.columns.tolist())))) return np.nan, [] cpt_diff = 0 correls_diff = [] cpt_ident = 0 correls_ident = [] cpt_case = 0 for row, col in fct_aux.range_2d(matE_gr.columns): if matE_gr.loc[row, col] != matE_gr_cp.loc[row, col]: cpt_diff += 1 correls_diff.append((row, col)) #print("ICI21 cpt_diff={}".format(cpt_diff)) elif matE_gr.loc[row, col] == matE_gr_cp.loc[row, col]: cpt_ident += 1 correls_ident.append((row, col)) #print("ICI22 cpt_ident={}".format(cpt_ident)) cpt_case += 1 print("%correl_modif={}, %correl_ident={}, modif_faites={}".format(round(cpt_diff/cpt_case,3), \ round(cpt_ident/cpt_case,3), round(cpt_modif/cpt_case,3))) dico_diff = dict() for corr in correls_diff: dico_diff[corr] = [ matE_gr_cp.loc[corr[0], corr[1]], round(matE_gr.loc[corr[0], corr[1]], 4) ] # print("%aretes_modif={}".format(correls_diff)) print("aretes_modif={}".format(dico_diff))
def matrice_adjacence_aretes_from_groovyFile(args): """ creation de la matrice d'adjacence des arcs a partir du reseauG_reelles.json """ dico_json = dict() if "dico_json" not in args.keys(): dico_json = fct_aux.export_from_json_to_dico(args["path_reseauG"]) else: dico_json = args["dico_json"] dico_adj = dict() aretes_G_tmp = set() for ext_source, ext_dests in dico_json.items(): for ext_dest in ext_dests: if (ext_dest, ext_source) not in aretes_G_tmp: aretes_G_tmp.add((ext_source, ext_dest)) aretes_G = set() for src_dest1, src_dest2 in fct_aux.range_2d(aretes_G_tmp): aretes_G.add(src_dest1[0] + "->" + src_dest1[1]) aretes_G.add(src_dest2[0] + "->" + src_dest2[1]) if src_dest1[0] == src_dest2[0] or src_dest1[0] == src_dest2[1] or \ src_dest1[1] == src_dest2[0] or src_dest1[1] == src_dest2[1] : dico_adj[(src_dest1[0] + "->" + src_dest1[1], src_dest2[0] + "->" + src_dest2[1])] = 1 l_aretes_G = list(aretes_G) matE_reel = pd.DataFrame(columns=l_aretes_G, index=l_aretes_G) for arete, val in dico_adj.items(): matE_reel.loc[arete[0], arete[1]] = 1 matE_reel.loc[arete[1], arete[0]] = 1 matE_reel.fillna(0, inplace=True) matE_reel.to_csv(args["chemin_matrices"] + "matE_reelles.csv") return matE_reel
def calculer_faux_pos_neg_correction(aretes_LG, matE): faux_pos_correct, faux_neg_correct = list(), list(); for row,col in fct_aux.range_2d(matE.columns): if (row,col) in aretes_LG or (col,row) in aretes_LG: if matE.loc[row, col] == 0: faux_pos_correct.append((row,col)); elif matE.loc[row, col] == 1: faux_neg_correct.append((row,col)); return faux_pos_correct, faux_neg_correct;
def correlation_par_metriqueFusion(chemin_mat_equipement, fenetre, mode, args): """ chemin_mat_equipement = chemin de la matrice d'equipement mode = mode de correlation return mat_R: matrice de correlation; """ matrices_equipment = distance_similarite.lire_matrice_equipements(\ chemin_mat_equipement); mat_R = pd.DataFrame(index = matrices_equipment, columns = matrices_equipment) for nom_matA, nom_matB in fct_aux.range_2d(matrices_equipment): # matA = pd.read_csv(chemin_mat_equipement+"matrice_"+nom_matA+".csv", index_col = "Unnamed: 0") # matB = pd.read_csv(chemin_mat_equipement+"matrice_"+nom_matB+".csv", index_col = "Unnamed: 0") ### matA = pd.read_csv(chemin_mat_equipement+"matrice_"+nom_matA+".csv") matB = pd.read_csv(chemin_mat_equipement+"matrice_"+nom_matB+".csv") index_data_matA = ""; index_data_matB = ""; if "timestamp" in matA.columns.tolist(): matA = matA.set_index("timestamp"); index_data_matA = "timestamp"; else: matA = matA.set_index("Unnamed: 0") index_data_matA = "Unnamed: 0"; if "timestamp" in matB.columns.tolist(): matB = matB.set_index("timestamp"); index_data_matB = "timestamp"; else: matB = matB.set_index("Unnamed: 0") index_data_matB = "Unnamed: 0"; ### # derivate: gradient ici if args["derivate"] == True: matA = pd.DataFrame(np.gradient(matA.values, args["interval_deriv"])[1], \ columns=matA.columns.tolist(), index = index_data_matA) matB = pd.DataFrame(np.gradient(matB.values, args["interval_deriv"])[1], \ columns=matB.columns.tolist(), index = index_data_matB) correlations = list() for ts_a, ts_b in distance_similarite.ts_slicing(matA, matB, fenetre, mode): r_ab_tmp = abs(distance_similarite.metriqueFusion_correlation(ts_a,\ ts_b)); print("ts_a = {}, ts_b = {}, r_ab_tmp = {}"\ .format(len(ts_a), len(ts_b), r_ab_tmp)) # print("ts_a = {}, \nts_b = {}".format(ts_a.index, ts_b.index) ) correlations.append(r_ab_tmp) r_ab = distance_similarite.metrique_slicing(correlations) logging.debug("matA = %s, matB = %s, r_ab = %s, correls=%s ", nom_matA, nom_matB, r_ab, correlations) mat_R.loc[nom_matA][nom_matB] = math.fabs(r_ab); mat_R.loc[nom_matB,nom_matA] = math.fabs(r_ab); print("(",nom_matA,",",nom_matB,"):",r_ab) # break mat_R.fillna(0, inplace = True) mat_R.to_csv(chemin_mat_equipement+"matE.csv") return mat_R;
def selectionner_cases_0_1(M_C): cases_0, cases_1 = list(),list(); cpt_cases = 0; for row,col in fct_aux.range_2d(M_C.columns): cpt_cases += 1; if row != col and M_C.loc[row,col] == 0: cases_0.append((row,col)); elif row != col and M_C.loc[row,col] == 1: cases_1.append((row,col)); print("cpt_cases ={}, cases_0_1={}, cases_0={}, cases_1={}".format(\ cpt_cases, len(cases_0)+len(cases_1), round(len(cases_0)/(len(cases_0)+len(cases_1)),3),\ round(len(cases_1)/(len(cases_0)+len(cases_1)),3))); return cases_0, cases_1;
def find_correl_arc_entrant_sortant_sommet(sommet, matE_gr): arcs_plus2 = [] arcs_minus2 = [] for arete in matE_gr.columns: if arete.split("->")[0] == sommet: arcs_plus2.append(arete) elif arete.split("->")[1] == sommet: arcs_minus2.append(arete) print("sommet={}, arcs_minus2={}, arcs_plus2={}".format( sommet, arcs_minus2, arcs_plus2)) m_minus2 = 0 m_plus2 = 0 dico_minus = dict() dico_plus = dict() for arete1, arete2 in fct_aux.range_2d(arcs_minus2): m_minus2 += matE_gr.loc[arete1, arete2] dico_minus[(arete1, arete2)] = matE_gr.loc[arete1, arete2] for arete1, arete2 in fct_aux.range_2d(arcs_plus2): m_plus2 += matE_gr.loc[arete1, arete2] dico_plus[(arete1, arete2)] = matE_gr.loc[arete1, arete2] print("sommet={}, m_minus2={}, m_plus2={}".format(sommet, m_minus2, m_plus2)) print("plus={}, minus={}".format(dico_plus, dico_minus)) pass
def matrice_binaire_seuil(M_C, matE_LG, seuil): cols = M_C.columns.tolist(); #cols_LG = matE_LG.columns.tolist(); # print("cols_M_C={}, cols_LG={}".format(cols, cols_LG)) matE = M_C.copy(); faux_pos = []; faux_neg = []; for row,col in fct_aux.range_2d(cols): if M_C.loc[row,col] > seuil: matE.loc[row,col] = 1; matE.loc[col,row] = 1; else: matE.loc[row,col] = 0; matE.loc[col,row] = 0; if matE_LG.loc[row,col] == 0 and matE.loc[row,col] == 1: faux_pos.append((row,col)); elif matE_LG.loc[row,col] == 1 and matE.loc[row,col] == 0: faux_neg.append((row,col)); return matE, faux_pos, faux_neg;
def modifier_matE_same_degre(matE, aretes, M, cpt_modif): for arete1, arete2 in fct_aux.range_2d(aretes): arete1_, arete2_ = verifier_row_col(arete1, arete2, matE) if arete1_ != None and arete2_ != None and arete1_ != arete2_: print("12 ({},{})={}".format(arete1_, arete2_, M)) matE.loc[arete1_, arete2_] = M matE.loc[arete2_, arete1_] = M cpt_modif += 1 elif arete1_ == arete2_: print("12 {}, {} identiques".format(arete1_, arete2_)) pass else: print("12 aretes {},{} dont belong to matE_gr".format( arete1, arete2)) pass return matE, cpt_modif
def aretes_orientes(matE_reel, oriente=True): """ retourner tous les arcs du graphe/sous graphe reel """ arcs = list() for row, col in fct_aux.range_2d(matE_reel.columns.tolist()): if matE_reel.loc[row, col] == 1 and oriente: arcs.append((row, col)) elif matE_reel.loc[row, col] == 1 and not oriente: arcs.append((row, col)) arcs.append((col, row)) else: print( "row={},col={} do not have a corresponding case in matE_reel". format(row, col)) return arcs
def sommer_correl_sortantes_or_entrantes(aretes_plus, matE_gr): """ faire la somme des correlations des aretes entrants/sortantes """ m_plus = 0 cpt = 0 for arete1_, arete2_ in fct_aux.range_2d(aretes_plus): arete1, arete2 = verifier_row_col(arete1_, arete2_, matE_gr) if arete1 != None and arete2 != None: cpt += 1 m_plus += matE_gr.loc[arete1, arete2] else: print("m_plus/minus: {} et {} dont belong to matE_gr".format( arete1_, arete2_)) if cpt == 0: return 0 return m_plus / cpt
def create_matE_grandeur(grandeur, epsilon = 0.8, chemin_data = "data/datasets/",\ chemin_mat = "data/matrices/"): """ return dico des arcs avec leur valeur de correlation """ t1 = time.time() data = pd.read_csv(chemin_data + 'dataset_' + grandeur + '.csv') if "timestamp" in data.columns.tolist(): data = data.set_index("timestamp") else: data = data.set_index("Unnamed: 0") logging.debug("correlation_sax = %s columns = %s", grandeur, data.columns.tolist()) dict_similitude = create_dico_similarite(data, epsilon) supp_dict_similitude = supprimer_les_grands_distances(dict_similitude, 10) dico_gp = dict() cols = data.columns.tolist() df = pd.DataFrame(index=cols, columns=cols) for cle, dico_valeur in supp_dict_similitude.items(): for cle_dico_valeur, valeur in dico_valeur.items(): arete0 = cle.split("_")[0].upper() arete1 = cle_dico_valeur.split("_")[0].upper() df.loc[arete0, arete1] = valeur df.loc[arete1, arete0] = valeur df = df.apply(lambda x: (100 - x) / 100) df[df == 100] = 1 for row, col in fct_aux.range_2d(cols): dico_gp[(row, col)] = df.loc[row, col] df.fillna(0, inplace=True) df.to_csv(chemin_mat + 'matrice_adjacence_proba_' + grandeur + '.csv') logging.debug("correlation_sax: grandeur = %s termine en = %s", grandeur, time.time() - t1) return dico_gp
def G_nn_commentaire(N, M): sommets = list(it.product(range(N), range(M), repeat=1)) # creation dataframe avec colonnes = liste de sommets G_nn M_G_nn = pd.DataFrame(0, columns=sommets, index=sommets) dico_sommets = dict() cpt = 0 aretes_G_nn_row_col = set() aretes_G_nn = set() dico_proba_cases = dict() for sommet in sommets: row = sommet[0] col = sommet[1] cpt += 1 dico_sommets[(row, col)] = str(cpt) if col + 1 < M: M_G_nn.at[(row, col), (row, col + 1)] = 1 M_G_nn.at[(row, col + 1), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row, col + 1)]) if (row,col+1) not in dico_sommets.keys() and \ (col+1,row) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row, col + 1)] = str(cpt) elif (row, col + 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row, col + 1)])) elif (col + 1, row) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col + 1, row)])) if col - 1 >= 0: M_G_nn.at[(row, col), (row, col - 1)] = 1 M_G_nn.at[(row, col - 1), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row, col - 1)]) if (row,col-1) not in dico_sommets.keys() and \ (col-1,row) not in dico_sommets.keys() : cpt += 1 dico_sommets[(row, col - 1)] = str(cpt) elif (row, col - 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row, col - 1)])) elif (col - 1, row) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col - 1, row)])) if row + 1 < N: M_G_nn.at[(row, col), (row + 1, col)] = 1 M_G_nn.at[(row + 1, col), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row + 1, col)]) if (row+1,col) not in dico_sommets.keys() and \ (col,row+1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row + 1, col)] = str(cpt) aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row + 1, col)])) elif (row + 1, col) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row + 1, col)])) elif (col, row + 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col, row + 1)])) if row - 1 >= 0: M_G_nn.at[(row, col), (row - 1, col)] = 1 M_G_nn.at[(row - 1, col), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row - 1, col)]) if (row-1,col) not in dico_sommets.keys() and \ (col,row-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row - 1, col)] = str(cpt) aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row - 1, col)])) elif (row - 1, col) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row - 1, col)])) elif (col, row - 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col, row - 1)])) M_G_nn.at[(0, 0), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (0, 0)] = 1 M_G_nn.at[(N - 1, 0), (0, 0)] = 1 M_G_nn.at[(0, 0), (N - 1, 0)] = 1 aretes_G_nn_row_col.update([(0, 0), (0, M - 1), (N - 1, 0)]) if (0, M - 1) not in dico_sommets.keys() and ( M - 1, 0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (N - 1, 0) not in dico_sommets.keys() and ( 0, N - 1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(0,M-1)],dico_sommets[(0,0)]) not in aretes_G_nn and \ (dico_sommets[(N-1,0)],dico_sommets[(0,0)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(0,0)],dico_sommets[(0,M-1)]),\ (dico_sommets[(0,0)],dico_sommets[(N-1,0)])]) M_G_nn.at[(N - 1, 0), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (N - 1, 0)] = 1 M_G_nn.at[(N - 1, 0), (0, 0)] = 1 M_G_nn.at[(0, 0), (N - 1, 0)] = 1 aretes_G_nn_row_col.update([(N - 1, 0), (N - 1, M - 1), (0, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (N-1,0) not in dico_sommets.keys() and \ (0,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(N-1,M-1)],dico_sommets[(N-1,0)]) not in aretes_G_nn and \ (dico_sommets[(0,0)],dico_sommets[(N-1,0)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(N-1,0)],dico_sommets[(N-1,M-1)]),\ (dico_sommets[(N-1,0)],dico_sommets[(0,0)])]) M_G_nn.at[(0, M - 1), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (0, 0)] = 1 M_G_nn.at[(0, 0), (0, M - 1)] = 1 aretes_G_nn_row_col.update([(0, M - 1), (N - 1, M - 1), (N - 1, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (0,M-1) not in dico_sommets.keys() and \ (M-1,0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (dico_sommets[(N-1,M-1)],dico_sommets[(0,M-1)]) not in aretes_G_nn and \ (dico_sommets[(0,0)],dico_sommets[(0,M-1)]) not in aretes_G_nn : aretes_G_nn.update([(dico_sommets[(0,M-1)],dico_sommets[(N-1,M-1)]),\ (dico_sommets[(0,M-1)],dico_sommets[(0,0)])]) M_G_nn.at[(N - 1, M - 1), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (N - 1, 0)] = 1 M_G_nn.at[(N - 1, 0), (N - 1, M - 1)] = 1 aretes_G_nn_row_col.update([(N - 1, M - 1), (0, M - 1), (N - 1, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (0,M-1) not in dico_sommets.keys() and \ (M-1,0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (N-1,0) not in dico_sommets.keys() and \ (0,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(M-1,N-1)],dico_sommets[(M-1,0)]) not in aretes_G_nn and \ (dico_sommets[(N-1,0)],dico_sommets[(N-1,M-1)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(N-1,M-1)],dico_sommets[(0,M-1)]),\ (dico_sommets[(N-1,M-1)],dico_sommets[(N-1,0)])]) # networkx # G = nx.Graph(M_G_nn.values); # plot graph with networkX # nx.draw(G, pos=nx.spring_layout(G), with_labels=True); # print("aretes_G_nn={},M_G_nn={},dico_sommets={} ".format(aretes_G_nn, M_G_nn.shape,dico_sommets )) # renommer index et colonnes M_G_nn.rename(columns=dico_sommets, inplace=True) M_G_nn.rename(index=dico_sommets, inplace=True) print("aretes_G_nn") # aretes G_nn aretes_G_nn = set() for row, col in fct_aux.range_2d(M_G_nn): if M_G_nn.at[row, col] == 1 and (row, col) not in aretes_G_nn: aretes_G_nn.add((row, col)) dico_proba_cases[(row, col)] = 0.5 print("M_G_nn=\n{}\n, dico_sommets={}\n, aretes_G_nn = {}\n, dico_proba_cases={}"\ .format(M_G_nn, dico_sommets, aretes_G_nn, dico_proba_cases)) return M_G_nn, dico_sommets, aretes_G_nn, dico_proba_cases pass
def correlation_1e_grandeur(grandeur, args): """ determiner la correlation associe a une grandeur """ df_gr = pd.read_csv(args["chemin_datasets"]+"dataset_"+grandeur+".csv"); if "timestamp" in df_gr.columns.tolist(): df_gr = df_gr.set_index("timestamp"); else: df_gr = df_gr.set_index("Unnamed: 0") logging.debug("correlation_1e_grandeur = %s columns = %s",\ grandeur, df_gr.columns.tolist()) cols = df_gr.columns.tolist(); matE_gp = pd.DataFrame(index = cols, columns = cols) #delete empty columns in df_gr cols_to_delete = list(df_gr.columns[df_gr.isnull().all()]) # df_gr.drop(cols_to_delete, axis = 1, inplace = True); dico_gp = dict(); if df_gr.empty : matE_gp.fillna(0, inplace = True); logging.debug("matE_(%s) is empty: all columns are empty", grandeur) return dico_gp, matE_gp; else: df_gr_norm = (df_gr - df_gr.mean(skipna=True))/df_gr.std(skipna=True); #df_gr_norm.fillna(0,inplace = True); df_gr_norm.fillna(method='pad',inplace = True); df_gr_norm = df_gr_norm.rolling(window=20).mean(); print("*** df_gr_norm = {}".format(df_gr_norm.columns.tolist())) for arc1, arc2 in fct_aux.range_2d(df_gr_norm.columns.tolist()): correl = 0; correl_tmp = 0; correl_max=0; corr_max_arc1 = 0; corr_max_arc2 = 0; correl_tmp = calcul_correlation_slicing(df_gr_norm[arc1], \ df_gr_norm[arc2], args["metrique_distance"], \ args["fenetre"]) if args["metrique_distance"] not in ["pearson","distance_pearson",\ "metrique_wil_histo","metrique_pearson_damien"]: corr_max_arc1 = calcul_correlation_slicing(df_gr_norm[arc1], \ df_gr_norm[arc1].ix[args["fenetre"]:], \ args["metrique_distance"], \ args["fenetre"]) corr_max_arc2 = calcul_correlation_slicing(df_gr_norm[arc2], \ df_gr_norm[arc2].ix[args["fenetre"]:], \ args["metrique_distance"], \ args["fenetre"]) correl_max = max(corr_max_arc1, corr_max_arc2) correl_tmp = value_0_1(correl_tmp, correl_max) correl = correl_tmp; if (arc1 == "GF2" or arc2 == "GF2") and \ (arc1 == "R495" or arc2 == "R495"): print("correl={}, arc1={}, correl_max_arc1={}, arc2={}, \ correl_max_arc2={}, correl_tmp={}"\ .format(correl,arc1,corr_max_arc1,arc2,\ corr_max_arc2,correl_tmp)) if (arc1 == "R495" or arc2 == "R495") and \ (arc1 == "TGBT4" or arc2 == "TGBT4"): print("correl={}, arc1={}, arc2={}".format(correl,arc1,arc2)) dico_gp[(arc1,arc2)] = abs(correl); matE_gp.loc[arc1, arc2] = abs(correl); matE_gp.loc[arc2, arc1] = abs(correl); logging.debug("(%s, %s) = %s ,correl_tmp=%s,correl_max=%s matE_grand = %s ,count: arc1 = %s, arc2 = %s", \ arc1, arc2, correl, correl_tmp, correl_max, matE_gp.loc[arc1,arc2], \ df_gr_norm[arc1].count(), df_gr_norm[arc2].count()) matE_gp.fillna(0, inplace = True); args["grandeur"] = grandeur; if "modifCorrel" in args.keys() and args["modifCorrel"]: args["matE_gp"] = matE_gp; matE_gp = ajusterCorrelation.ajouter_mesures_correlations(args); dico_gp_tmp = dict(); for arc1, arc2 in fct_aux.range_2d(matE_gp.columns): dico_gp_tmp[(arc1,arc2)] = matE_gp.loc[arc1, arc2] dico_gp = dico_gp_tmp; matE_gp.to_csv(args["chemin_matrices"]+\ "matrice_adjacence_proba_"+grandeur+".csv"); analyse(dico_gp, grandeur,args,critere=0.9); print("grandeur ", grandeur, " termine") return dico_gp, matE_gp;
def G_nn_new_avec_Cpt(N, M): sommets = list(it.product(range(N), range(M), repeat=1)) # creation dataframe avec colonnes = liste de sommets G_nn M_G_nn = pd.DataFrame(0, columns=sommets, index=sommets) dico_sommets = dict() cpt = 0 aretes_G_nn_row_col = set() aretes_G_nn = set() dico_proba_cases = dict() for sommet in sommets: row = sommet[0] col = sommet[1] cpt += 1 dico_sommets[(row, col)] = str(cpt) # dico_proba_cases[(row, col)] = 0.5; if col + 1 < M: M_G_nn.at[(row, col), (row, col + 1)] = 1 M_G_nn.at[(row, col + 1), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row, col + 1)]) if (row,col+1) not in dico_sommets.keys() and \ (col+1,row) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row, col + 1)] = str(cpt) elif (row, col + 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row, col + 1)])) elif (col + 1, row) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col + 1, row)])) # if (dico_sommets[(row,col+1)], dico_sommets[(row,col)]) not in aretes_G_nn: # aretes_G_nn.add((dico_sommets[(row,col)],dico_sommets[(row,col+1)])) if col - 1 >= 0: M_G_nn.at[(row, col), (row, col - 1)] = 1 M_G_nn.at[(row, col - 1), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row, col - 1)]) if (row,col-1) not in dico_sommets.keys() and \ (col-1,row) not in dico_sommets.keys() : cpt += 1 dico_sommets[(row, col - 1)] = str(cpt) elif (row, col - 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row, col - 1)])) elif (col - 1, row) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col - 1, row)])) # if (dico_sommets[(row,col-1)],dico_sommets[(row,col)]) not in aretes_G_nn: # aretes_G_nn.add((dico_sommets[(row,col)],dico_sommets[(row,col-1)])) if row + 1 < N: M_G_nn.at[(row, col), (row + 1, col)] = 1 M_G_nn.at[(row + 1, col), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row + 1, col)]) if (row+1,col) not in dico_sommets.keys() and \ (col,row+1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row + 1, col)] = str(cpt) aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row + 1, col)])) elif (row + 1, col) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row + 1, col)])) elif (col, row + 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col, row + 1)])) if row - 1 >= 0: M_G_nn.at[(row, col), (row - 1, col)] = 1 M_G_nn.at[(row - 1, col), (row, col)] = 1 aretes_G_nn_row_col.update([(row, col), (row - 1, col)]) if (row-1,col) not in dico_sommets.keys() and \ (col,row-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(row - 1, col)] = str(cpt) aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row - 1, col)])) elif (row - 1, col) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(row - 1, col)])) elif (col, row - 1) in dico_sommets.keys(): aretes_G_nn.add( (dico_sommets[(row, col)], dico_sommets[(col, row - 1)])) M_G_nn.at[(0, 0), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (0, 0)] = 1 M_G_nn.at[(N - 1, 0), (0, 0)] = 1 M_G_nn.at[(0, 0), (N - 1, 0)] = 1 aretes_G_nn_row_col.update([(0, 0), (0, M - 1), (N - 1, 0)]) if (0, M - 1) not in dico_sommets.keys() and ( M - 1, 0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (N - 1, 0) not in dico_sommets.keys() and ( 0, N - 1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(0,M-1)],dico_sommets[(0,0)]) not in aretes_G_nn and \ (dico_sommets[(N-1,0)],dico_sommets[(0,0)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(0,0)],dico_sommets[(0,M-1)]),\ (dico_sommets[(0,0)],dico_sommets[(N-1,0)])]) M_G_nn.at[(N - 1, 0), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (N - 1, 0)] = 1 M_G_nn.at[(N - 1, 0), (0, 0)] = 1 M_G_nn.at[(0, 0), (N - 1, 0)] = 1 aretes_G_nn_row_col.update([(N - 1, 0), (N - 1, M - 1), (0, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (N-1,0) not in dico_sommets.keys() and \ (0,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(N-1,M-1)],dico_sommets[(N-1,0)]) not in aretes_G_nn and \ (dico_sommets[(0,0)],dico_sommets[(N-1,0)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(N-1,0)],dico_sommets[(N-1,M-1)]),\ (dico_sommets[(N-1,0)],dico_sommets[(0,0)])]) M_G_nn.at[(0, M - 1), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (0, 0)] = 1 M_G_nn.at[(0, 0), (0, M - 1)] = 1 aretes_G_nn_row_col.update([(0, M - 1), (N - 1, M - 1), (N - 1, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (0,M-1) not in dico_sommets.keys() and \ (M-1,0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (dico_sommets[(N-1,M-1)],dico_sommets[(0,M-1)]) not in aretes_G_nn and \ (dico_sommets[(0,0)],dico_sommets[(0,M-1)]) not in aretes_G_nn : aretes_G_nn.update([(dico_sommets[(0,M-1)],dico_sommets[(N-1,M-1)]),\ (dico_sommets[(0,M-1)],dico_sommets[(0,0)])]) M_G_nn.at[(N - 1, M - 1), (0, M - 1)] = 1 M_G_nn.at[(0, M - 1), (N - 1, M - 1)] = 1 M_G_nn.at[(N - 1, M - 1), (N - 1, 0)] = 1 M_G_nn.at[(N - 1, 0), (N - 1, M - 1)] = 1 aretes_G_nn_row_col.update([(N - 1, M - 1), (0, M - 1), (N - 1, 0)]) if (N-1,M-1) not in dico_sommets.keys() and \ (M-1,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, M - 1)] = str(cpt) if (0,M-1) not in dico_sommets.keys() and \ (M-1,0) not in dico_sommets.keys(): cpt += 1 dico_sommets[(0, M - 1)] = str(cpt) if (N-1,0) not in dico_sommets.keys() and \ (0,N-1) not in dico_sommets.keys(): cpt += 1 dico_sommets[(N - 1, 0)] = str(cpt) if (dico_sommets[(M-1,N-1)],dico_sommets[(M-1,0)]) not in aretes_G_nn and \ (dico_sommets[(N-1,0)],dico_sommets[(N-1,M-1)]) not in aretes_G_nn: aretes_G_nn.update([(dico_sommets[(N-1,M-1)],dico_sommets[(0,M-1)]),\ (dico_sommets[(N-1,M-1)],dico_sommets[(N-1,0)])]) # renommer index et colonnes M_G_nn.rename(columns=dico_sommets, inplace=True) M_G_nn.rename(index=dico_sommets, inplace=True) print("aretes_G_nn") # aretes G_nn aretes_G_nn = set() for row, col in fct_aux.range_2d(M_G_nn): if M_G_nn.at[row, col] == 1 and (row, col) not in aretes_G_nn: aretes_G_nn.add((row, col)) dico_proba_cases[(row, col)] = 0.5 print("M_G_nn=\n{}\n, dico_sommets={}\n, aretes_G_nn = {}\n, dico_proba_cases={}"\ .format(M_G_nn, dico_sommets, aretes_G_nn, dico_proba_cases)) return M_G_nn, dico_sommets, aretes_G_nn, dico_proba_cases pass
def simulation_reel(cpt_SEUIL, arg_params, arg_chemins): """ SEUIL = definit sur arg_params """ path_distr_chemin = arg_chemins["file"]+"/"+str(arg_params["mode_select_noeuds_1"])+"/"+\ "data_correl_seuil_"+str(arg_params["correl_seuil"])+"_"+\ arg_params["metrique_distance"]+"/distribution/" path_distr = Path(path_distr_chemin) path_distr.mkdir(parents=True, exist_ok=True) headers_df = ["correl_seuil", "nbre_aretes_matE", "nbre_aretes_LG", \ "dist_line", "nbre_aretes_diff_matE_LG", \ "liste_aretes_diff_matE_LG", "C_old", "C", "som_cout_min", \ "noeuds_corriges", "min_hamming","mean_hamming", \ "max_hamming","ecart_type", "max_cout", "max_permutation", \ "dico_som_min_permutations", "dico_dual_arc_sommet"] matE_proba = pd.read_csv(arg_chemins["chemin_matrices"]+arg_params["matE"]+\ "_"+arg_params["metrique_distance"]+".csv",index_col = "Unnamed: 0") matE = matrice_binaire(matE_proba.copy(), arg_params["correl_seuil"]) print("ici ok 1") # return ## dico des correls dico_proba_cases = dict() dico_dual_arc_sommet = dict() cpt_arete = 0 for row, col in fct_aux.range_2d(matE_proba.columns.tolist()): dico_proba_cases[(row, col)] = matE_proba.loc[row][col] dico_dual_arc_sommet[str(cpt_arete)] = (row, col) cpt_arete += 1 print("ici ok 2") dico_permutation_cliq = dict() #algo corrigeant tous les noeuds a -1 dico_permutation_cliq = \ decouvClique.decouverte_cliques(matE.copy(), dico_dual_arc_sommet, \ arg_params["seuil_U"], arg_params["epsilon"], \ arg_chemins["chemin_datasets"], arg_chemins["chemin_matrices"],\ arg_params["ascendant_1"], arg_params["simulation"],\ dico_proba_cases,\ arg_params) print("ici ok 3") # Debut selection de la permutation de noeuds dont la distance hamming est la plus petite dico_sol = dict() dico_sol = simu50_PARALL.best_permutation(dico_permutation_cliq, matE, matE) # FIN selection de la permutation de noeuds dont la distance hamming est la plus petite print("ici ok 4") dico_som_min_permutations = dict() for l_noeuds_1, values in dico_permutation_cliq.items(): if values[6] not in dico_som_min_permutations.keys(): dico_som_min_permutations[values[6]] = [l_noeuds_1] else: dico_som_min_permutations[values[6]].append(l_noeuds_1) print("ici ok 5") df = pd.DataFrame(columns=headers_df) df.loc[cpt_SEUIL] = [arg_params["correl_seuil"], \ dico_sol["nbre_aretes_matE"], \ dico_sol["nbre_aretes_LG"], \ dico_sol["dist_line"], \ dico_sol["nbre_aretes_diff_matE_LG"], \ dico_sol["liste_aretes_diff_matE_LG"], \ dico_sol["C_old"], dico_sol["C"], \ dico_sol["som_cout_min"], \ dico_sol["noeuds_corriges"], \ dico_sol["min_hamming"],dico_sol["mean_hamming"], \ dico_sol["max_hamming"],dico_sol["ecart_type"], \ dico_sol["max_cout"], dico_sol["max_permutation"], \ dico_som_min_permutations, dico_dual_arc_sommet \ ] simu50_PARALL.save_df(df, path_distr_chemin, cpt_SEUIL, headers_df) print("save df pour seuil = %s " % cpt_SEUIL) f = open(path_distr_chemin + "distribution_DistLine_seuil.txt", "a") f.write(str(cpt_SEUIL)+";"+str(dico_sol["dist_line"])+";"+ \ str(dico_sol["som_cout_min"])+";"+\ str(dico_sol["nbre_aretes_diff_matE_LG"])+";"+\ str(dico_sol["nbre_aretes_matE"])+"\n") ## fichier seuil, liste aretes_LG PATH = arg_chemins["chemin_equipements"] + "seuil_aretes_LG_" + arg_params[ "metrique_distance"] + ".json" dico_json = dict() dico_correl = dict() dico_json = dict() dico_correl[arg_params["correl_seuil"]] = dico_sol["aretes_LG"] if os.path.isfile(PATH) and os.access(PATH, os.R_OK): print(" ok seuil ", arg_params["correl_seuil"]) dico_json = json.load(open(PATH)) else: with open(PATH, 'w') as f: json.dump(dico_correl, f) dico_json.update(dico_correl) with open(PATH, 'w') as f: json.dump(dico_json, f) # return dico_sol["C"], dico_sol["dist_line"], dico_sol["som_cout_min"]; pass
def gamma(reseauG, bool_entr_sort, df_gr, matE_gr): """ recherche les aretes sortant et entrants de tous les sommets du graphe/sous graphe reel bool_entr_sort = True si entrant gamma- = entrant = False si sortant gamma+ = sortant cols | y | t | x---> y ----------- \__t rows x | 1 1 z__/ z | 1 """ ### # print("len reseauG={}, len matE_gr={}".format( len(reseauG.columns.tolist()), len(matE_gr.columns.tolist()))) # cols_r = reseauG.columns.tolist(); cols_gr = matE_gr.columns.tolist(); # print("col_r = {}, col_gr = {}".format(cols_r[:3], cols_gr[:3])) # arcs_diff = set(cols_r).union(set(cols_gr)) - set(cols_r).intersection(set(cols_gr)) # print("arc_diff = {}, union={}, inter={}".format(arcs_diff, len(set(cols_r).union(set(cols_gr))), \ # len(set(cols_r).intersection(set(cols_gr))))) # if set(reseauG.columns.tolist()).issubset( set(matE_gr.columns.tolist()) ): # print("ICI1 = len reseauG={}, len matE_gr={}".format( len(reseauG.columns.tolist()), len(matE_gr.columns.tolist()))) # elif set(matE_gr.columns.tolist()).issubset( set(reseauG.columns.tolist()) ): # print("ICI2") # else: # print("ICI3") ### dico, dico_bar = dict(), dict() if bool_entr_sort == False: # m++ ==> arcs sortants for row in reseauG.index.tolist(): aretes = list() aretes_bar = list() m_plus2 = 0 for col in reseauG.columns.tolist(): if reseauG.loc[row, col] == 1: aretes.append(row + "->" + col) if row + "->" + col in df_gr.columns and df_gr.loc[:, row + "->" + col].isnull( ).all(): aretes_bar.append(row + "->" + col) elif col + "->" + row in df_gr.columns and df_gr.loc[:, col + "->" + row].isnull( ).all( ): aretes_bar.append(row + "->" + col) if len(aretes) == 0: dico[row] = [0, aretes] dico_bar[row] = [0, aretes_bar] # print("row={}".format(row)) else: for arete1, arete2 in fct_aux.range_2d(aretes): if arete1 in matE_gr.columns.tolist( ) and arete2 in matE_gr.columns.tolist(): m_plus2 += matE_gr.loc[arete1, arete2] dico[row] = [m_plus2 / len(aretes), aretes] dico_bar[row] = [0, aretes_bar] else: # m-- ==> arcs entrants for col in reseauG.columns.tolist(): aretes = list() aretes_bar = list() m_minus2 = 0 for row in reseauG.index.tolist(): if reseauG.loc[row, col] == 1: aretes.append(row + "->" + col) if row + "->" + col in df_gr.columns and df_gr.loc[:, row + "->" + col].isnull( ).all(): aretes_bar.append(row + "->" + col) elif col + "->" + row in df_gr.columns and df_gr.loc[:, col + "->" + row].isnull( ).all( ): aretes_bar.append(row + "->" + col) # if df_gr.loc[:,row+"->"+col].isnull().all() or df_gr.loc[:,col+"->"+row].isnull().all(): # aretes_bar.append(row+"->"+col); if len(aretes) == 0: dico[col] = [0, aretes] dico_bar[col] = [0, aretes_bar] # print("col={}".format(col)) else: for arete1, arete2 in fct_aux.range_2d(aretes): if arete1 in matE_gr.columns.tolist( ) and arete2 in matE_gr.columns.tolist(): m_minus2 += matE_gr.loc[arete1, arete2] dico[col] = [m_minus2 / len(aretes), aretes] dico_bar[col] = [0, aretes_bar] return dico, dico_bar