def load_synthetic(mu, n=1000, tau1=3, tau2=1.5, edge_drop_percent=0.2): G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) for n in G.nodes: G.nodes[n]['value'] = list(G.nodes[n]['community'])[0] true_coms = list(nx.get_node_attributes(G, 'value').values()) com_keys = list(Counter(true_coms).keys()) for i in range(0, len(true_coms)): G.nodes[i]['value'] = com_keys.index(true_coms[i]) #remove self edges selfE = list(G.selfloop_edges()) for (i, j) in selfE: G.remove_edge(i, j) #convert all graph to undirected G = nx.Graph(G) ListOfEdges = [e for e in G.edges] sample = int(len(ListOfEdges) * edge_drop_percent) RandomSample = random.sample(ListOfEdges, sample) G.remove_edges_from(RandomSample) return (G, RandomSample)
def get_lfr_network_data(n, tau1, tau2, mu): graph = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) all_edges = list(graph.edges) idx = np.arange(len(all_edges)) np.random.shuffle(idx) test_idx = idx[:int(0.2 * len(graph.edges))] pos_test_links = [all_edges[i] for i in test_idx] n_test = len(test_idx) neg_test_links = sample_neg(graph, num_neg_links=n_test) graph.remove_edges_from(pos_test_links) return graph, pos_test_links, neg_test_links
def LFR(n, tau1, tau2, mu): #n = 1000 #(int)number of nodes #tau1 = 3 #(float) Power law exponent for the degree distribution of the created graph. This value must be strictly greater than one. #tau2 = 1.1 #(float) Power law exponent for the community size distribution in the created graph. This value must be strictly greater than one. #mu = 0.1 #(float) Fraction of intra-community edges incident to each node. This value must be in the interval [0, 1]. #greater mu => pio asafeis koinothtes! #average_degree and min_degree must be in [0, n]. One of these must be specified. #max_degree if not specified is set to n. #min_community if not specified is set to min_degree. #max_community if not specified is set to n. #tol(float) Tolerance when comparing floats, specifically when comparing average degree values. #max_iters (int) Maximum number of iterations to try to create the community sizes, degree distribution, and community affiliations. #seed (integer, random_state, or None (default)) Indicator of random number generation state. os.chdir('experiments/datasets/lfr') G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=10, max_degree=50, min_community=10, max_community=50) #remove self loops G.remove_edges_from(G.selfloop_edges()) numberOfEdges = G.number_of_edges() print("Number of edges of graph G: ", numberOfEdges) print("------------------------------") #na mh sxediazontai oi aksones #plt.axis('off') #sxediasmos grafou #nx.draw(G) communities = {frozenset(G.nodes[v]['community']) for v in G} adjacency_list_filename = 'lfrAdjlistN' + str(n) + 'MU' + str(mu) + '*.txt' edge_list_filename = 'lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.txt' community_list_filename = 'lfrCommN' + str(n) + 'MU' + str(mu) + '*.txt' #print('Communities: ', communities) with open('lfrCommN' + str(n) + 'MU' + str(mu) + '*.txt', 'w') as fc: fc.write(str([list(x) for x in communities])) nx.write_adjlist(G, adjacency_list_filename) fh = open(adjacency_list_filename, 'wb') nx.write_adjlist(G, fh) edge_list = [] with open(adjacency_list_filename, 'r') as f: for line in f: if line.startswith("#"): #skip first comment lines continue else: line = line.rstrip('\n').split(' ') source = line[0] for target in line[1:]: #edge_list.append("%s %s 1" % (source, target)) #1 is for the weight edge_list.append("%s %s" % (source, target)) with open(edge_list_filename, 'w') as f: f.write('%s\n' % ('\n'.join(edge_list))) with open(community_list_filename, 'w') as f: for item in communities: f.write("%s\n" % str(list(item))) #remove unecessary symbols like [] with open(community_list_filename, 'r') as my_file: text = my_file.read() text = text.replace("[", "") text = text.replace("]", "") text = text.replace(",", "") with open(community_list_filename, 'w') as my_file: my_file.write(text) #convert edge txt file to csv file appending also weight 1 to all edges with open('lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.txt') as data_file: reader = csv.reader(data_file, delimiter=' ') with open('lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.csv', 'w') as out_file: writer = csv.writer(out_file, delimiter=';') for row in reader: writer.writerow([row[0], row[1], 1]) return 'lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.csv'