def prepare_communities(community_file, n_nodes):
    i = 0

    communities = {
    }  # Dicionário com uma chave (id da community): e uma lista de ids dos membros da comunidade
    size = []  # Lista com os tamanhos das communidades
    size_norm = [
    ]  # Lista com os tamanhos das communidades normalizada pelo número de vértices da rede-ego
    n_singletons = 0  # Número de Singletons (comunidades formada por apenas um vértice)
    n_non_singletons = 0  # Número de Não Singletons

    for line in community_file:
        i += 1
        key = "com" + str(
            i)  # Chave para o dicionário comm - um identificador "comm1"
        comm = []  # Lista para armazenar as os membros da comunidade i
        a = line.split(' ')
        for item in a:
            if item != "\n":
                comm.append(long(item))

        if len(comm) > 1:
            n_singletons += 1
        else:
            n_non_singletons += 1

        communities[
            key] = comm  # dicionário communities recebe a lista de ids dos membros das comunidades tendo como chave o valor key
        b = float(len(comm)) / float(n_nodes)
        size.append(len(comm))
        size_norm.append(b)

    n_comm = len(
        communities)  # Quantidade de comunidades para o ego em questão

    avg_size = calc.calcular(
        size)  # Somar o vetor com o tamanho das comunidades...
    avg_size_norm = calc.calcular(
        size_norm
    )  # Somar o vetor com o tamanho das comunidades normalizado...

    overlap = float(avg_size['soma']) / float(
        n_nodes
    )  # The overlap: the average number of communities to which each vertex belongs. This is the sum of the sizes of all communities (including singletons) divided by the number of vertices, n.

    return communities, n_comm, size, avg_size[
        'media'], size_norm, avg_size_norm[
            'media'], overlap, n_singletons, n_non_singletons
Esempio n. 2
0
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_connected_comp.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_connected_comp.json")
    else:

        print("Componentes conectados - " + str(dataset_dir))

        cc = []  # Média do tamanho dos componentes conectados por rede-ego
        cc_normal = [
        ]  # Média (normalizada pelo número de vértices do grafo) do tamanho dos componentes conectados por rede-ego
        n_cc = []  # Média do número de componentes conectados por rede-ego
        n = []  # vetor com número de vértices para cada rede-ego
        e = []  # vetor com número de arestas para cada rede-ego
        i = 0

        for file in os.listdir(dataset_dir):

            i += 1
            print(
                str(output_dir) + str(net) + "/" + str(file) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')


#			G.Dump()
#			time.sleep(5)

#####################################################################################

            n.append(G.GetNodes())  # Numero de vertices
            e.append(G.GetEdges())  # Numero de arestas
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()

            #####################################################################################
            if n_edges == 0:
                a = 0
                cc.append(a)
                cc_normal.append(a)
                n_cc.append(a)
                print("Nenhuma aresta encontrada para a rede-ego " + str(i) +
                      " - (" + str(file))
            else:
                Components = snap.TCnComV()
                snap.GetWccs(G, Components)
                _cc = []
                _cc_normal = []
                _n_cc = 0
                for CnCom in Components:
                    _cc.append(CnCom.Len())
                    b = float(CnCom.Len()) / float(n_nodes)
                    _cc_normal.append(b)
                    _n_cc += 1
                result = calc.calcular(_cc)
                cc.append(result['media'])

                result_normal = calc.calcular(_cc_normal)
                cc_normal.append(result_normal['media'])

                n_cc.append(_n_cc)
                print("Número de componentes conectados para o ego " +
                      str(i) + " (" + str(file) + "): " + str(_n_cc))
                print(
                    "Média do tamanho dos componentes conectados para o ego "
                    + str(i) + " (" + str(file) + "): " + str(result['media']))
                print(
                    "Média (normalizada) do tamanho dos componentes conectados para o ego "
                    + str(i) + " (" + str(file) + "): " +
                    str(result_normal['media']))
                print

        N_CC = calc.calcular_full(n_cc)
        CC = calc.calcular_full(cc)
        CC_NORMAL = calc.calcular_full(cc_normal)

        overview = {}
        overview['Len_ConnectedComponents'] = CC
        overview['Len_ConnectedComponents_Normal'] = CC_NORMAL
        overview['N_ConnectedComponents'] = N_CC

        with open(str(output_dir) + str(net) + "_connected_comp.json",
                  'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_connected_comp.txt",
                  'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write(
                "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao']))
            f.write(
                "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CC['media'], CC['variancia'], CC['desvio_padrao']))
            f.write(
                "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CC_NORMAL['media'], CC_NORMAL['variancia'],
                   CC_NORMAL['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )

        print(
            "\n######################################################################\n"
        )
        print(
            "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao']))
        print(
            "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CC['media'], CC['variancia'], CC['desvio_padrao']))
        print(
            "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CC_NORMAL['media'], CC_NORMAL['variancia'],
               CC_NORMAL['desvio_padrao']))
        print(
            "\n######################################################################\n"
        )
def calculate_alg(singletons,net,uw,ud,g_type,alg):
	
	communities = "/home/amaury/communities_hashmap/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/" 

	graphs = "/home/amaury/graphs_hashmap_infomap_without_weight/"+str(net)+"/"+str(g_type)+"/"		#Todos os grafos sem peso...	
	
	out_ad = str(output_dir)+"average_degree/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"		
	out_c = str(output_dir)+"conductance/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"	
	out_cut_r = str(output_dir)+"cut_ratio/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_d = str(output_dir)+"density/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_e = str(output_dir)+"expansion/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_normal_cut = str(output_dir)+"normalized_cut/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_s = str(output_dir)+"separability/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	
	_avg_time = []
	
	if not os.path.exists(communities):
		print ("Diretório com as comunidades não encontrado: "+str(communities)+"\n")

	else:
		print("\n######################################################################")

		for threshold in os.listdir(communities):
			if not os.path.isdir(str(communities)+str(threshold)+"/"):
				print ("Threshold para a rede "+str(net)+" não encontrado: "+str(threshold))

			else:
				partial_start = time.time()
				create_dirs(out_ad,out_c,out_cut_r,out_d,out_e,out_normal_cut,out_s)

				if os.path.exists(str(out_ad)+str(threshold)+".json") and os.path.exists(str(out_c)+str(threshold)+".json") and os.path.exists(str(out_cut_r)+str(threshold)+".json") and os.path.exists(str(out_d)+str(threshold)+".json") and os.path.exists(str(out_e)+str(threshold)+".json") and os.path.exists(str(out_normal_cut)+str(threshold)+".json") and os.path.exists(str(out_s)+str(threshold)+".json"):
					print ("Arquivo de destino já existe: "+str(threshold)+".json")
					
				else:	
					print("######################################################################")
							
					average_degree = {}
					conductance = {}
					cut_ratio = {}
					density = {}
					expansion = {}
					normalized_cut = {}
					separability = {}
					
					i=0 		#Ponteiro para o ego
					for file in os.listdir(str(communities)+str(threshold)+"/"):
						if os.path.isfile(str(communities)+str(threshold)+"/"+file):
							ego_id = file.split(".txt")
							ego_id = long(ego_id[0])
							i+=1

							if not os.path.isfile(str(graphs)+str(ego_id)+".edge_list"):
								print ("ERROR - EGO: "+str(i)+" - Arquivo com lista de arestas não encontrado:" +str(graphs)+str(ego_id)+".edge_list")

							else:
								with open(str(communities)+str(threshold)+"/"+file, 'r') as community_file:
									if ud is False:
										G = snap.LoadEdgeList(snap.PNGraph, str(graphs)+str(ego_id)+".edge_list", 0, 1)					   # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
									else:
										G = snap.LoadEdgeList(snap.PUNGraph, str(graphs)+str(ego_id)+".edge_list", 0, 1)						# load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')

									print(str(g_type)+" - "+str(alg)+" - "+str(singletons)+" - Rede: "+str(net)+" - THRESHOLD: "+str(threshold)+" - ego("+str(i)+"): "+str(file))
									
									communities_dict = prepare_communities(community_file)							#Função para devolver um dicionário com as comunidades							
									
									avg_ad,avg_c,avg_cut_r,avg_d,avg_e,avg_normal_cut,avg_s = metrics_v2.calc_metrics(communities_dict,G,ud)		# Calcular as métricas
									
									average_degree[ego_id] = avg_ad
									conductance[ego_id] = avg_c
									cut_ratio[ego_id] = avg_cut_r
									density[ego_id] = avg_d
									expansion[ego_id] = avg_e
									normalized_cut[ego_id] = avg_normal_cut
									separability[ego_id] = avg_s	
			
									print ("Average Degree: "+str(avg_ad['media'])+" - Conductance: "+str(avg_c['media'])+" - Cut Ratio: "+str(avg_cut_r['media'])+" - Density: "+str(avg_d['media']))
									print ("Expansion: "+str(avg_e['media'])+" - Normalized Cut: "+str(avg_normal_cut['media'])+" - Separability: "+str(avg_s['media']))
									print 
					print("######################################################################")	

					with open(str(out_ad)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(average_degree))
						
					with open(str(out_c)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(conductance))

					with open(str(out_cut_r)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(cut_ratio))
											
					with open(str(out_d)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(density))

					with open(str(out_e)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(expansion))

					with open(str(out_normal_cut)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(normalized_cut))

					with open(str(out_s)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(separability))						
				
				partial_end = time.time()
				partial_time_exec = partial_end - partial_start
				print ("\nTempo de execução para o threshold "+str(threshold)+": "+str(partial_time_exec)+"\n")
				_avg_time.append(partial_time_exec)
	avg_time = calc.calcular(_avg_time)
	print ("\nTempo de médio de execução em cada threshold: "+str(avg_time)+"\n")
	print("\n######################################################################\n")		
Esempio n. 4
0
def net_structure(dataset_dir, output_dir, graph_type, metric, net, alg):
    os.system('clear')
    print(
        "\n######################################################################\n"
    )
    print(
        "\nScript para cálculo da modularidade das comunidades detectadas\n")

    graphs_dir = "/home/amaury/graphs_hashmap_infomap_without_weight/" + str(
        net) + "/" + str(graph_type) + "/"

    if not os.path.exists(graphs_dir):
        print("Diretório não encontrado: " + str(graphs_dir))

    else:
        print(
            "\n######################################################################\n"
        )
        print(
            "\nScript para cálculo da modularidade das comunidades detectadas - Rede "
            + str(net) + "\n")

        if not os.path.isdir(dataset_dir + str(net) + "/"):
            print("Diretório com avaliações da rede " + str(net) +
                  " não encontrado: " + str(dataset_dir + str(net) + "/"))
        else:
            for threshold in os.listdir(dataset_dir + str(net) + "/"):
                if os.path.isfile(str(output_dir) + str(threshold) + ".json"):
                    print("Arquivo de destino já existe. " + str(output_dir) +
                          str(threshold) + ".json")
                else:

                    modularity = [
                    ]  # Vetor com a Média das modularidades de cada grafo
                    modularity_data = {
                    }  # Dicionário com o ego e as modularidades para cada comunidade
                    i = 0

                    for file in os.listdir(dataset_dir + str(net) + "/" +
                                           str(threshold) + "/"):
                        i += 1
                        ego_id = file.split(".txt")
                        ego_id = long(ego_id[0])
                        communities = [
                        ]  # Armazenar as comunidades da rede-ego
                        m_file = [
                        ]  # vetor de modularidade das comunidades do ego i

                        try:
                            G = snap.LoadEdgeList(
                                snap.PNGraph,
                                str(graphs_dir) + str(ego_id) + ".edge_list",
                                0, 1
                            )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
                            n_edges = G.GetEdges(
                            )  # Número de arestas do grafo

                            if n_edges == 0:
                                a = 0
                                m_file.append(a)
                            else:
                                try:
                                    with open(
                                            dataset_dir + str(net) + "/" +
                                            str(threshold) + "/" + str(file),
                                            'r') as f:
                                        for line in f:
                                            comm = [
                                            ]  #Lista para armazenar as comunidades
                                            a = line.split(' ')
                                            for item in a:
                                                if item != "\n":
                                                    comm.append(item)
                                            communities.append(comm)
                                except Exception as e:
                                    print(
                                        "\nERRO - Impossível carregar as comunidades: "
                                        + dataset_dir + str(net) + "/" +
                                        str(threshold) + "/" + str(file) +
                                        "\n")
                                    print e

                                for comm in communities:
                                    if comm is not None:
                                        Nodes = snap.TIntV()
                                        for nodeId in comm:
                                            if nodeId is not None:
                                                Nodes.Add(long(nodeId))
                                        m_file.append(
                                            snap.GetModularity(
                                                G, Nodes, n_edges)
                                        )  #Passar o número de arestas do grafo como parâmetro para agilizar o processo

                        except Exception as e:
                            print(
                                "\nERRO - Impossível carregar o grafo para o ego: "
                                + str(ego_id) + "  --  " + str(graphs_dir) +
                                str(ego_id) + ".edge_list\n")
                            print e

                        _m_file = calc.calcular(m_file)
                        modularity_data[ego_id] = m_file
                        if _m_file is not None:
                            modularity.append(_m_file['media'])

                            print(
                                str(graph_type) + " - Rede: " + str(net) +
                                " - Threshold: " + str(threshold) +
                                " - Modularidade para o ego " + str(i) + " (" +
                                str(file) + "): %5.3f" % (_m_file['media']))
                            print(
                                "######################################################################"
                            )
                    M = calc.calcular_full(modularity)

                    if M is not None:
                        overview = {
                            'threshold': threshold,
                            'modularity': M,
                            'modularity_data': modularity_data
                        }
                        print(
                            "\n######################################################################\n"
                        )
                        print(
                            "Rede: %s   ---   Threshold: %s   ---   Modularity: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f"
                            % (net, threshold, M['media'], M['variancia'],
                               M['desvio_padrao']))
                        print(
                            "\n######################################################################\n"
                        )

                    if overview is not None:
                        with open(
                                str(output_dir) + str(threshold) + ".json",
                                'a+') as f:
                            f.write(json.dumps(overview) + "\n")

    print(
        "\n######################################################################\n"
    )
Esempio n. 5
0
def calculate_alg(singletons, net, ud, g_type, alg):

    communities = "/home/amaury/communities_hashmap/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"

    graphs = "/home/amaury/graphs_hashmap_infomap_without_weight/" + str(
        net) + "/" + str(
            g_type
        ) + "/"  #Pega só o grafo sem peso para realizar os cálculos

    out_Q = str(output_dir) + "modularity/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_Qds = str(output_dir) + "modularity_density/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_intra_edges = str(output_dir) + "intra_edges/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_intra_density = str(output_dir) + "intra_density/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_contraction = str(output_dir) + "contraction/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_inter_edges = str(output_dir) + "inter_edges/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_expansion = str(output_dir) + "expansion/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_conductance = str(output_dir) + "conductance/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_modularity_degree = str(output_dir) + "modularity_degree/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"

    _avg_time = []

    if not os.path.exists(communities):
        print("Diretório com as comunidades não encontrado: " +
              str(communities) + "\n")

    else:
        print(
            "\n######################################################################"
        )

        for threshold in os.listdir(communities):
            if not os.path.isdir(str(communities) + str(threshold) + "/"):
                print("Threshold para a rede " + str(net) +
                      " não encontrado: " + str(threshold))

            else:
                partial_start = time.time()
                paths = [
                    out_Q, out_Qds, out_intra_edges, out_intra_density,
                    out_contraction, out_inter_edges, out_expansion,
                    out_conductance, out_modularity_degree
                ]
                create_dirs(paths)

                if os.path.exists(
                        str(out_Q) + str(threshold) +
                        ".json") and os.path.exists(
                            str(out_Qds) + str(threshold) + ".json"
                        ) and os.path.exists(
                            str(out_intra_edges) + str(threshold) + ".json"
                        ) and os.path.exists(
                            str(out_intra_density) + str(threshold) +
                            ".json") and os.path.exists(
                                str(out_contraction) + str(threshold) +
                                ".json") and os.path.exists(
                                    str(out_inter_edges) + str(threshold) +
                                    ".json") and os.path.exists(
                                        str(out_expansion) + str(threshold) +
                                        ".json") and os.path.exists(
                                            str(out_conductance) +
                                            str(threshold) +
                                            ".json") and os.path.exists(
                                                str(out_modularity_degree) +
                                                str(threshold) + ".json"):
                    print("Arquivo de destino já existe: " + str(out_Q) +
                          str(threshold) + ".json")

                else:
                    print(
                        "######################################################################"
                    )

                    Q = {}
                    Qds = {}
                    intra_edges = {}
                    intra_density = {}
                    contraction = {}
                    inter_edges = {}
                    expansion = {}
                    conductance = {}
                    modularity_degree = {}

                    i = 0  #Ponteiro para o ego
                    for file in os.listdir(
                            str(communities) + str(threshold) + "/"):
                        if os.path.isfile(
                                str(communities) + str(threshold) + "/" +
                                file):
                            ego_id = file.split(".txt")
                            ego_id = long(ego_id[0])
                            i += 1

                            if not os.path.isfile(
                                    str(graphs) + str(ego_id) + ".edge_list"):
                                print(
                                    "ERROR - EGO: " + str(i) +
                                    " - Arquivo com lista de arestas não encontrado:"
                                    + str(graphs) + str(ego_id) + ".edge_list")

                            else:
                                community_file = str(communities) + str(
                                    threshold) + "/" + file
                                graph_file = str(graphs) + str(
                                    ego_id) + ".edge_list"

                                print("Ego: " + str(i) + " - " + communities +
                                      threshold + "/" + file)

                                if ud is False:  # Para grafo NÂO Ponderado e Direcionado
                                    execute = subprocess.Popen(
                                        [
                                            "java", "CommunityQuality",
                                            str(graph_file),
                                            str(community_file), "isUnweighted"
                                        ],
                                        stdout=subprocess.PIPE)
                                else:  # Para grafo NÃO Ponderado e NÃO Direcionado
                                    execute = subprocess.Popen(
                                        [
                                            "java", "CommunityQuality",
                                            str(graph_file),
                                            str(community_file),
                                            "isUnweighted", "isUndirected"
                                        ],
                                        stdout=subprocess.PIPE)

                                resp = execute.communicate()[0]
                                print resp
                                value = resp.split(", ")
                                for item in value:
                                    item = item.split(" = ")

                                    if item[0] == "Q":
                                        Q[ego_id] = float(item[1])
                                    elif item[0] == "Qds":
                                        Qds[ego_id] = float(item[1])
                                    elif item[0] == "intraEdges":
                                        intra_edges[ego_id] = float(item[1])
                                    elif item[0] == "intraDensity":
                                        intra_density[ego_id] = float(item[1])
                                    elif item[0] == "contraction":
                                        contraction[ego_id] = float(item[1])
                                    elif item[0] == "interEdges":
                                        inter_edges[ego_id] = float(item[1])
                                    elif item[0] == "expansion":
                                        expansion[ego_id] = float(item[1])
                                    elif item[0] == "conductance":
                                        conductance[ego_id] = float(item[1])
                                    elif item[0] == "modularity degree":
                                        modularity_degree[ego_id] = float(
                                            item[1])

                    print(
                        "######################################################################"
                    )

                    with open(str(out_Q) + str(threshold) + ".json", "w") as f:
                        f.write(json.dumps(Q))

                    with open(str(out_Qds) + str(threshold) + ".json",
                              "w") as f:
                        f.write(json.dumps(Qds))

                    with open(
                            str(out_intra_edges) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(intra_edges))

                    with open(
                            str(out_intra_density) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(intra_density))

                    with open(
                            str(out_contraction) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(contraction))

                    with open(
                            str(out_inter_edges) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(inter_edges))

                    with open(
                            str(out_expansion) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(expansion))

                    with open(
                            str(out_conductance) + str(threshold) + ".json",
                            "w") as f:
                        f.write(json.dumps(conductance))

                    with open(
                            str(out_modularity_degree) + str(threshold) +
                            ".json", "w") as f:
                        f.write(json.dumps(modularity_degree))

                partial_end = time.time()
                partial_time_exec = partial_end - partial_start
                print("\nTempo de execução para o threshold " +
                      str(threshold) + ": " + str(partial_time_exec) + "\n")
                _avg_time.append(partial_time_exec)
    avg_time = calc.calcular(_avg_time)
    print("\nTempo de médio de execução em cada threshold: " +
          str(avg_time) + "\n")
    print(
        "\n######################################################################\n"
    )
def calc_metrics(communities,G,uw,ud):
######################################################################################################################################################################



	average_degree = [] 
	conductance = []		
	cut_ratio = []
	density = []
	expansion = []
	normal_cut_ratio = []
	separability = []
	clustering = []

	if ud is False:													#Para grafos direcionados...
		for k,community in communities.iteritems():
			_average_degree = 0 
			_conductance = 0		
			_cut_ratio = 0
			_density = 0
			_expansion = 0
			_normal_cut_ratio = 0
			_separability = 0
			
			average_degree.append(_average_degree)														#Armazena os resultados para cada partição para depois fazer a média do ego. 		
			conductance.append(_conductance)																#Armazena os resultados para cada partição para depois fazer a média do ego.
			cut_ratio.append(_cut_ratio)																	#Armazena os resultados para cada partição para depois fazer a média do ego.		
			density.append(_density)																		#Armazena os resultados para cada partição para depois fazer a média do ego.
			expansion.append(_expansion)																	#Armazena os resultados para cada partição para depois fazer a média do ego.
			normal_cut_ratio.append(_normal_cut_ratio)												#Armazena os resultados para cada partição para depois fazer a média do ego.		
			separability.append(_separability)															#Armazena os resultados para cada partição para depois fazer a média do ego.
	
	else:																			#Para grafos não direcionados...
		clustering_of_G = nx.average_clustering(G,weight='weight')			#Calcula o coeficiente de Clustering para o Grafo.
		
		for k,community in communities.iteritems():
			_average_degree = 0 
			_conductance = 0		
			_cut_ratio = 0
			_density = 0
			_expansion = 0
			_normal_cut_ratio = 0
			_separability = 0
############################################################################################################ 	CLUSTERING COEFFICIENT	 
			_cc = []										#Anexar os coeficientes de clustering de cada Nó na comunidade
			for Node in community:
				try:
					_cc.append(clustering_of_G[Node])
				except Exception as e:
					print ("Error - "+str(e))
			if _cc is not None:
				_clustering = calc.calcular(_cc)		#Trazer a média do coeficiente de clustering da comunidade
				clustering.append(_clustering['media'])												#Armazena os resultados para cada partição para depois fazer a média do ego.
			else:
				_clustering = 0
				clustering.append(_clustering)															#Armazena os resultados para cada partição para depois fazer a média do ego.
#############################################################################################################				
				
			average_degree.append(_average_degree)														#Armazena os resultados para cada partição para depois fazer a média do ego. 		
			conductance.append(_conductance)																#Armazena os resultados para cada partição para depois fazer a média do ego.
			cut_ratio.append(_cut_ratio)																	#Armazena os resultados para cada partição para depois fazer a média do ego.		
			density.append(_density)																		#Armazena os resultados para cada partição para depois fazer a média do ego.
			expansion.append(_expansion)																	#Armazena os resultados para cada partição para depois fazer a média do ego.
			normal_cut_ratio.append(_normal_cut_ratio)												#Armazena os resultados para cada partição para depois fazer a média do ego.		
			separability.append(_separability)															
				
	
	avg_ad = calc.calcular_full(average_degree)	
	avg_c = calc.calcular_full(conductance)
	avg_cut_r = calc.calcular_full(cut_ratio)
	avg_d = calc.calcular_full(density)
	avg_e = calc.calcular_full(expansion)
	avg_normal_cut = calc.calcular_full(normal_cut_ratio)
	avg_s = calc.calcular_full(separability)
	avg_cc = calc.calcular_full(clustering)		

	print avg_ad, avg_c, avg_cut_r, avg_d, avg_e, avg_normal_cut, avg_s, avg_cc
	time.sleep(5)
	return avg_ad, avg_c, avg_cut_r, avg_d, avg_e, avg_normal_cut, avg_s, avg_cc
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_net_struct.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_net_struct.json")
    else:

        print("Dataset network structure - " + str(dataset_dir))
        n = []  # Média dos nós por rede-ego
        e = []  # Média das arestas por rede-ego

        bc_n = []  # média de betweenness centrality dos nós
        bc_e = []  # média de betweenness centrality das arestas

        i = 0

        for file in os.listdir(dataset_dir):
            i += 1
            print(
                str(output_dir) + str(net) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')

#####################################################################################

            n.append(G.GetNodes())  # Numero de vertices
            e.append(G.GetEdges())  # Numero de arestas
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()

            #####################################################################################
            if n_edges == 0 or n_nodes < 3:
                bc_n.append(n_edges)
                bc_e.append(n_edges)
            else:
                Nodes = snap.TIntFltH()
                Edges = snap.TIntPrFltH()
                snap.GetBetweennessCentr(
                    G, Nodes, Edges, 1.0,
                    IsDir)  #Betweenness centrality Nodes and Edges
                _bc_n = []
                _bc_e = []
                if IsDir is True:
                    max_betweenneess = (n_nodes - 1) * (n_nodes - 2)
                else:
                    max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2

                for node in Nodes:
                    bc_n_normalized = float(
                        Nodes[node]) / float(max_betweenneess)
                    _bc_n.append(bc_n_normalized)

                for edge in Edges:
                    bc_e_normalized = float(
                        Edges[edge]) / float(max_betweenneess)
                    _bc_e.append(bc_e_normalized)
                result = calc.calcular(_bc_n)
                bc_n.append(result['media'])
                result = calc.calcular(_bc_e)
                bc_e.append(result['media'])


#####################################################################################

        BC_N = calc.calcular_full(bc_n)
        BC_E = calc.calcular_full(bc_e)

        overview = {}

        overview['BetweennessCentrNodes'] = BC_N
        overview['BetweennessCentrEdges'] = BC_E

        with open(str(output_dir) + str(net) + "_net_struct.json", 'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_net_struct.txt", 'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write(
                "Betweenness Centr Nodes: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (BC_N['media'], BC_N['variancia'], BC_N['desvio_padrao']))
            f.write(
                "Betweenness Centr Edges: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (BC_E['media'], BC_E['variancia'], BC_E['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )
def prepare_communities(community_file, n_nodes):
    i = 0

    communities = {
    }  # Dicionário com uma chave (id da community): e uma lista de ids dos membros da comunidade
    alters_set = set()
    size = []  # Lista com os tamanhos das communidades
    size_norm = [
    ]  # Lista com os tamanhos das communidades normalizada pelo número de vértices da rede-ego
    greater_comm_norm = 0  # Tamanho da maior comunidade normalizado pelo conjunto de vértices do grafo
    n_singletons = 0  # Número de Singletons (comunidades formada por apenas um vértice)
    n_non_singletons = 0  # Número de Não Singletons
    greater_comm = 0  # Tamanho da maior comunidade
    smaller_comm = "inf"  # Tamanho da menor comunidade

    for line in community_file:
        i += 1
        key = "com" + str(
            i)  # Chave para o dicionário comm - um identificador "comm1"
        comm = []  # Lista para armazenar as os membros da comunidade i
        a = line.split(' ')
        for item in a:
            if item != "\n":
                comm.append(long(item))
                alters_set.add(long(item))

        if len(comm) > 1:
            n_non_singletons += 1
        elif len(comm) == 1:
            n_singletons += 1

        if len(comm) > greater_comm:  # Tamanho da maior comunidade
            greater_comm = len(comm)

        if len(comm) < smaller_comm:  # Tamanho da menor comunidade
            smaller_comm = len(comm)

        communities[
            key] = comm  # dicionário communities recebe a lista de ids dos membros das comunidades tendo como chave o valor key
        b = float(len(comm)) / float(n_nodes)
        size.append(len(comm))
        size_norm.append(b)

    n_comm = len(
        communities)  # Quantidade de comunidades para o ego em questão
    greater_comm_norm = float(greater_comm) / float(n_nodes)

    if n_nodes > alters_set:
        alters_ignored = n_nodes - len(
            alters_set
        )  # Número de alters que foram ignorados no processo de detecção e não receberam rótulos.
        alters_ignored_norm = float(alters_ignored) / float(n_nodes)
    else:
        alters_ignored = 0
        alters_ignored_norm = 0

    avg_size = calc.calcular_full(
        size)  # Somar o vetor com o tamanho das comunidades...
    avg_size_norm = calc.calcular(
        size_norm
    )  # Somar o vetor com o tamanho das comunidades normalizado...

    overlap = float(avg_size['soma']) / float(
        n_nodes
    )  # The overlap: the average number of communities to which each vertex belongs. This is the sum of the sizes of all communities (including singletons) divided by the number of vertices, n.

    return communities, n_comm, size, avg_size['media'], avg_size[
        'desvio_padrao'], size_norm, avg_size_norm[
            'media'], overlap, n_singletons, n_non_singletons, alters_ignored, alters_ignored_norm, greater_comm, greater_comm_norm, smaller_comm
Esempio n. 9
0
def calc_metric(G, metric):
    IsDir = True  # Todas as redes são direcionadas
    n_nodes = G.GetNodes()
    n_edges = G.GetEdges()

    if metric == "nodes":
        result = n_nodes

    elif metric == "edges":
        result = n_edges

    elif metric == "size":
        result = n_nodes + n_edges

    elif metric == "avg_degree":
        result = float(2 * n_edges) / float(n_nodes)

    elif metric == "diameter":
        result = snap.GetBfsFullDiam(G, 100, IsDir)

    elif metric == "density":
        result = float(n_edges) / (float(n_nodes) * (float(n_nodes - 1)))

    elif metric == "closeness_centr":
        Normalized = True
        cc = []
        for NI in G.Nodes():
            cc.append(snap.GetClosenessCentr(
                G, NI.GetId(), Normalized, IsDir))  #get a closeness centrality

        _cc = calc.calcular(cc)
        result = _cc['media']

    elif metric == "betweenness_centr_nodes":
        bc_n = []
        if n_edges == 0 or n_nodes < 3:
            bc_n.append(int(0))
        else:
            Nodes = snap.TIntFltH()
            Edges = snap.TIntPrFltH()
            snap.GetBetweennessCentr(G, Nodes, Edges, 1.0,
                                     IsDir)  #Betweenness centrality Nodes
            if IsDir is True:
                max_betweenneess = (n_nodes - 1) * (n_nodes - 2)
            else:
                max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2
            for node in Nodes:
                bc_n_normalized = float(Nodes[node]) / float(max_betweenneess)
                bc_n.append(bc_n_normalized)
        _bc_n = calc.calcular(bc_n)
        result = _bc_n['media']

    elif metric == "betweenness_centr_edges":
        bc_e = []
        if n_edges == 0 or n_nodes < 3:
            bc_e.append(int(0))
        else:
            Nodes = snap.TIntFltH()
            Edges = snap.TIntPrFltH()
            snap.GetBetweennessCentr(G, Nodes, Edges, 1.0,
                                     IsDir)  #Betweenness centrality Edges
            if IsDir is True:
                max_betweenneess = (n_nodes - 1) * (n_nodes - 2)
            else:
                max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2
            for edge in Edges:
                bc_e_normalized = float(Edges[edge]) / float(max_betweenneess)
                bc_e.append(bc_e_normalized)
        _bc_e = calc.calcular(bc_e)
        result = _bc_e['media']

    elif metric == "clust_coef":
        result = snap.GetClustCf(G, -1)

    else:
        result = None
        print("\nImpossível calcular " + str(metric))
        print("\n")
        sys.exit()

    return result
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_clustering_coef.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_clustering_coef.json")
    else:

        print("Dataset clustering coefficient - " + str(dataset_dir))

        cf = []  # Média dos coeficientes de clusterings por rede-ego
        gcf = []  # Média usando opção global
        n = []  # vetor com número de vértices para cada rede-ego
        e = []  # vetor com número de arestas para cada rede-ego
        i = 0

        for file in os.listdir(dataset_dir):

            i += 1
            print(
                str(output_dir) + str(net) + "/" + str(file) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
#			G.Dump()
#			time.sleep(5)

#####################################################################################

            n.append(G.GetNodes())  # Numero de vertices
            e.append(G.GetEdges())  # Numero de arestas
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()

            #####################################################################################
            #Usando opção local - Retorna o mesmo resultado do global
            if n_edges == 0:
                a = 0
                cf.append(a)
                print("Nenhuma aresta encontrada para a rede-ego " + str(i) +
                      " - (" + str(file))
            else:
                NIdCCfH = snap.TIntFltH()
                snap.GetNodeClustCf(G, NIdCCfH)
                _cf = []
                for item in NIdCCfH:
                    _cf.append(NIdCCfH[item])  # Clusterinf Coefficient
                result = calc.calcular(_cf)
                cf.append(result['media'])
                print("Clustering Coef para o ego " + str(i) + " (" +
                      str(file) + "): " + str(result['media']))
                print


#####################################################################################
#Usando opção global   - Retorna o mesmo resultado do local
#
#			if n_edges == 0:
#				a = 0
#				gcf.append(a)
#			else:
#				GraphClustCoeff = snap.GetClustCf (G)
#				gcf.append(GraphClustCoeff)
#				print "Clustering coefficient: %f" % GraphClustCoeff
#				print

#####################################################################################
        CF = calc.calcular_full(cf)

        overview = {}
        overview['ClusteringCoefficient'] = CF

        with open(str(output_dir) + str(net) + "_clustering_coef.json",
                  'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_clustering_coef.txt",
                  'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write(
                "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CF['media'], CF['variancia'], CF['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )

        print(
            "\n######################################################################\n"
        )
        print(
            "Clustering Coef: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CF['media'], CF['variancia'], CF['desvio_padrao']))
        print(
            "\n######################################################################\n"
        )
def calculate_alg(singletons,net,uw,ud,g_type,alg):
	
	communities = "/home/amaury/communities_hashmap/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/" 
	
	graphs = "/home/amaury/graphs_hashmap_infomap_without_weight/"+str(net)+"/"+str(g_type)+"/" # Todos os grafos serão considerados sem peso...	
	
	
	out_ad = str(output_dir)+"average_degree/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"		
	out_c = str(output_dir)+"conductance/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"	
	out_cut_r = str(output_dir)+"cut_ratio/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_d = str(output_dir)+"density/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_e = str(output_dir)+"expansion/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_normal_cut = str(output_dir)+"normalized_cut/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_s = str(output_dir)+"separability/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	out_s = str(output_dir)+"clustering/"+str(g_type)+"/"+str(alg)+"/"+str(singletons)+"/"+str(net)+"/"
	
	_avg_time = []
	
	if not os.path.exists(communities):
		print ("Diretório com as comunidades não encontrado: "+str(communities)+"\n")

	else:
		print("\n######################################################################")

		for threshold in os.listdir(communities):
			if not os.path.isdir(str(communities)+str(threshold)+"/"):
				print ("Threshold para a rede "+str(net)+" não encontrado: "+str(threshold))

			else:
				partial_start = time.time()
				create_dirs(out_ad,out_c,out_cut_r,out_d,out_e,out_normal_cut,out_s)

				if os.path.exists(str(out_ad)+str(threshold)+".json") and os.path.exists(str(out_c)+str(threshold)+".json") and os.path.exists(str(out_cut_r)+str(threshold)+".json") and os.path.exists(str(out_d)+str(threshold)+".json") and os.path.exists(str(out_e)+str(threshold)+".json") and os.path.exists(str(out_normal_cut)+str(threshold)+".json") and os.path.exists(str(out_s)+str(threshold)+".json"):
					print ("Arquivo de destino já existe: "+str(threshold)+".json")
					
				else:	
					print("######################################################################")
							
					average_degree = {}
					conductance = {}
					cut_ratio = {}
					density = {}
					expansion = {}
					normalized_cut = {}
					separability = {}
					clustering = {}
					
					i=0 		#Ponteiro para o ego
					for file in os.listdir(str(communities)+str(threshold)+"/"):
						if os.path.isfile(str(communities)+str(threshold)+"/"+file):
							ego_id = file.split(".txt")
							ego_id = long(ego_id[0])
							i+=1

							if not os.path.isfile(str(graphs)+str(ego_id)+".edge_list"):
								print ("ERROR - EGO: "+str(i)+" - Arquivo com lista de arestas não encontrado:" +str(graphs)+str(ego_id)+".edge_list")

							else:
								with open(str(communities)+str(threshold)+"/"+file, 'r') as community_file:
									
									if ud is True and uw is False:																							# Se o grafo é UNDirected e Weighted (n5,n6,n7,n8,n10)
										G = nx.read_weighted_edgelist(str(graphs)+str(ego_id)+".edge_list")										
									elif ud is False and uw is False:																						# Se o grafo é Directed e Weighted (n2,n3,n4)			
										G = nx.read_weighted_edgelist(str(graphs)+str(ego_id)+".edge_list",create_using=nx.DiGraph())
									elif ud is False and uw is True: 																						# Se o grafo é Directed e UNWeighted (n1,n9)
										G = nx.read_edgelist(str(graphs)+str(ego_id)+".edge_list")
									else:
										("\ERROR - Impossível criar grafo com as configurações de ud e uw: "+str(uw),str(ud))
										G = 0
										sys.exit()										
										
									print(str(g_type)+" - "+str(alg)+" - "+str(singletons)+" - Rede: "+str(net)+" - THRESHOLD: "+str(threshold)+" - ego("+str(i)+"): "+str(file))
									
									communities_dict = prepare_communities(community_file)							#Função para devolver um dicionário com as comunidades							
									
									
									avg_ad,avg_c,avg_cut_r,avg_d,avg_e,avg_normal_cut,avg_s, avg_cc = metrics_networkx.calc_metrics(communities_dict,G,uw,ud)		# Calcular as métricas
									
									average_degree[ego_id] = avg_ad
									conductance[ego_id] = avg_c
									cut_ratio[ego_id] = avg_cut_r
									density[ego_id] = avg_d
									expansion[ego_id] = avg_e
									normalized_cut[ego_id] = avg_normal_cut
									separability[ego_id] = avg_s
									clustering[ego_id] = avg_cc
		 
					print("######################################################################")	

					with open(str(out_ad)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(average_degree))
						
					with open(str(out_c)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(conductance))

					with open(str(out_cut_r)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(cut_ratio))
											
					with open(str(out_d)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(density))

					with open(str(out_e)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(expansion))

					with open(str(out_normal_cut)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(normalized_cut))

					with open(str(out_s)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(separability))
					
					with open(str(out_s)+str(threshold)+".json", "w") as f:
						f.write(json.dumps(clustering))					
				partial_end = time.time()
				partial_time_exec = partial_end - partial_start
				print ("\nTempo de execução para o threshold "+str(threshold)+": "+str(partial_time_exec)+"\n")
				_avg_time.append(partial_time_exec)
	avg_time = calc.calcular(_avg_time)
	print ("\nTempo de médio de execução em cada threshold: "+str(avg_time)+"\n")
	print("\n######################################################################\n")		
Esempio n. 12
0
def statistics(dataset_dir, output_dir, net, isdir):
    print(
        "\n######################################################################\n"
    )
    print("Dataset statistics - " + str(dataset_dir))
    IsDir = isdir
    n = []  # Média dos nós por rede-ego
    e = []  # Média das arestas por rede-ego
    d = []  # Média dos diametros por rede-ego
    cc = []  # Média dos coeficientes de clusterings por rede-ego
    bc_n = []  # média de betweenness centrality dos nós
    bc_e = []  # média de betweenness centrality das arestas

    i = 0
    for file in os.listdir(dataset_dir):
        i += 1
        print("Calculando propriedades para o ego %d..." % (i))

        G = snap.LoadEdgeList(snap.PNGraph, dataset_dir + file, 0,
                              1)  # load from a text file
        n.append(G.GetNodes())  # Numero de vertices
        e.append(G.GetEdges())  # Numero de arestas
        d.append(snap.GetBfsFullDiam(G, 100, IsDir))  # get diameter of G
        #		cc.append(snap.GetClustCf(G))																		# clustering coefficient of G

        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()
        snap.GetBetweennessCentr(G, Nodes, Edges, 1.0, IsDir)
        _bc_n = []
        _bc_e = []
        for node in Nodes:
            _bc_n.append(Nodes[node])
        for edge in Edges:
            _bc_e.append(Edges[edge])
        result = calc.calcular(_bc_n)
        bc_n.append(result['media'])
        result = calc.calcular(_bc_e)
        bc_e.append(result['media'])

#####################################################################################
    N = calc.calcular_full(n)
    E = calc.calcular_full(e)
    D = calc.calcular_full(d)
    BC_N = calc.calcular_full(bc_n)
    BC_E = calc.calcular_full(bc_e)
    print(
        "\n######################################################################\n"
    )
    print("NET: %s -- Egos-net: %d" % (net, len(n)))
    print("Nodes: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f" %
          (N['media'], N['variancia'], N['desvio_padrao']))
    print("Edges: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f" %
          (E['media'], E['variancia'], E['desvio_padrao']))
    print("Diameter: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f" %
          (D['media'], D['variancia'], D['desvio_padrao']))
    print(
        "Betweenness Centr Nodes: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f"
        % (BC_N['media'], BC_N['variancia'], BC_N['desvio_padrao']))
    print(
        "Betweenness Centr Edges: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f"
        % (BC_E['media'], BC_E['variancia'], BC_E['desvio_padrao']))
    print(
        "\n######################################################################\n"
    )
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_net_struct.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_net_struct.json")
    else:

        print("Dataset network structure - " + str(dataset_dir))
        n = []  # Média dos nós por rede-ego
        e = []  # Média das arestas por rede-ego
        nodes = {}  # chave_valor para ego_id e numero de vertices
        edges = {}  # chave_valor para ego_id e numero de arestas
        d = []  # Média dos diametros por rede-ego
        cc = []  # Média dos Close Centrality
        bc_n = []  # média de betweenness centrality dos nós
        bc_e = []  # média de betweenness centrality das arestas
        degree = {
        }  # chave-valor para armazenar "grau dos nós - numero de nós com este grau"
        i = 0

        for file in os.listdir(dataset_dir):
            ego_id = file.split(".edge_list")
            ego_id = long(ego_id[0])
            i += 1
            print(
                str(output_dir) + str(net) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')

#####################################################################################
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()
            nodes[ego_id] = n_nodes  #Dicionário ego_id = vertices
            edges[ego_id] = n_edges
            n.append(n_nodes)  # Numero de vértices
            e.append(n_edges)  # Número de Arestas

            #####################################################################################
            if n_edges == 0:
                a = 0
                d.append(a)
                cc.append(a)
                bc_n.append(a)
                bc_e.append(a)
            else:
                d.append(snap.GetBfsFullDiam(G, 100,
                                             IsDir))  # get diameter of G

                #####################################################################################

                _cc = []
                Normalized = True
                for NI in G.Nodes():
                    _cc.append(
                        snap.GetClosenessCentr(
                            G, NI.GetId(), Normalized,
                            IsDir))  #get a closeness centrality
                result = calc.calcular(_cc)
                cc.append(result['media'])

#####################################################################################

            if n_edges == 0 or n_nodes < 3:
                bc_n.append(n_edges)
                bc_e.append(n_edges)
            else:
                Nodes = snap.TIntFltH()
                Edges = snap.TIntPrFltH()
                snap.GetBetweennessCentr(
                    G, Nodes, Edges, 1.0,
                    IsDir)  #Betweenness centrality Nodes and Edges
                _bc_n = []
                _bc_e = []
                if IsDir is True:
                    max_betweenneess = (n_nodes - 1) * (n_nodes - 2)
                else:
                    max_betweenneess = ((n_nodes - 1) * (n_nodes - 2)) / 2

                for node in Nodes:
                    bc_n_normalized = float(
                        Nodes[node]) / float(max_betweenneess)
                    _bc_n.append(bc_n_normalized)

                for edge in Edges:
                    bc_e_normalized = float(
                        Edges[edge]) / float(max_betweenneess)
                    _bc_e.append(bc_e_normalized)
                result = calc.calcular(_bc_n)
                bc_n.append(result['media'])
                result = calc.calcular(_bc_e)
                bc_e.append(result['media'])

                #####################################################################################

                DegToCntV = snap.TIntPrV()
                snap.GetDegCnt(G,
                               DegToCntV)  #Grau de cada nó em cada rede-ego
                for item in DegToCntV:
                    k = item.GetVal1()
                    v = item.GetVal2()
                    if degree.has_key(k):
                        degree[k] = degree[k] + v
                    else:
                        degree[k] = v

#####################################################################################

            print n[i - 1], e[i - 1], d[i - 1], cc[i - 1], bc_n[i -
                                                                1], bc_e[i - 1]
            print
#####################################################################################

        N = calc.calcular_full(n)
        E = calc.calcular_full(e)

        histogram.histogram(degree, output_dir + "histogram" + "/", N['soma'],
                            net)

        D = calc.calcular_full(d)

        CC = calc.calcular_full(cc)

        BC_N = calc.calcular_full(bc_n)
        BC_E = calc.calcular_full(bc_e)

        overview = {}
        overview['Nodes'] = N
        overview['Edges'] = E
        overview['Diameter'] = D
        overview['CloseCentr'] = CC
        overview['BetweennessCentrNodes'] = BC_N
        overview['BetweennessCentrEdges'] = BC_E

        nodes_stats = calc.calcular_full(n)
        edges_stats = calc.calcular_full(e)
        overview_basics = {
            'nodes': n,
            'nodes_stats': nodes_stats,
            'edges': e,
            'edges_stats': edges_stats
        }

        output_basics = output_dir + "/" + str(net) + "/"
        if not os.path.exists(output_basics):
            os.makedirs(output_basics)

        with open(str(output_basics) + str(net) + "_nodes.json", 'w') as f:
            f.write(json.dumps(nodes))
        with open(str(output_basics) + str(net) + "_edges.json", 'w') as f:
            f.write(json.dumps(edges))

        with open(str(output_basics) + str(net) + "_overview.json", 'w') as f:
            f.write(json.dumps(overview_basics))

        with open(str(output_dir) + str(net) + "_net_struct.json", 'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_net_struct.txt", 'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write("NET: %s -- Ego-nets: %d \n" % (net, len(n)))
            f.write(
                "Nodes: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" %
                (N['media'], N['variancia'], N['desvio_padrao']))
            f.write(
                "Edges: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n" %
                (E['media'], E['variancia'], E['desvio_padrao']))
            f.write(
                "Diameter: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (D['media'], D['variancia'], D['desvio_padrao']))
            f.write(
                "CloseCentr: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CC['media'], CC['variancia'], CC['desvio_padrao']))
            f.write(
                "Betweenness Centr Nodes: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (BC_N['media'], BC_N['variancia'], BC_N['desvio_padrao']))
            f.write(
                "Betweenness Centr Edges: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (BC_E['media'], BC_E['variancia'], BC_E['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )