def id2file(self, nodefn, edgefn): with open(nodefn, 'w') as f: for i in texthelper.sortDict(self.node2id, By="value"): f.write(i[0] + ' ' + str(i[1]) + '\n') with open(edgefn, 'w') as f: for i in texthelper.sortDict(self.edge2id, By="value"): f.write(i[0] + ' ' + str(i[1]) + '\n')
def id2file(self): with open(os.path.dirname(self.file) + '/entity2id.txt', 'w') as f: for i in texthelper.sortDict(self.node2id, By="value"): f.write(i[0] + ' ' + str(i[1]) + '\n') with open(os.path.dirname(self.file) + '/relation2id.txt', 'w') as f: for i in texthelper.sortDict(self.edge2id, By="value"): f.write(i[0] + ' ' + str(i[1]) + '\n')
def write2flie(self): with open(os.path.dirname(self.file) + '/entity2id.txt', 'w') as f: node2id = texthelper.sortDict(self.node2id, By="value") for i in node2id: f.write(str(i[0]) + ' ' + str(i[1]) + '\n') with open(os.path.dirname(self.file) + '/relation2id.txt', 'w') as f: relation2id = texthelper.sortDict(self.edge2id, By="value") for i in relation2id: f.write(str(i[0]) + ' ' + str(i[1]) + '\n')
def zipf_coeffi(self, plot=True): # print(nx.average_clustering(graphmes.uG)) degree = {} zipf_coeffi = {} for i in self.uG.nodes(): if self.uG.degree(i) in degree: degree[self.uG.degree(i)].append(i) else: degree.update({self.uG.degree(i): [i]}) for i in degree: zipf_coeffi.update({i: 0}) for node in degree[i]: zipf_coeffi[i] += nx.clustering(self.uG, node) zipf_coeffi[i] /= len(degree[i]) zipf_coeffi = np.array(texthelper.sortDict(zipf_coeffi, By="key")) if plot == False: return zipf_coeffi else: xdata = zipf_coeffi[:, 0] ydata = zipf_coeffi[:, 1] fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata) plt.figure() plt.title("Degree-Clustering distribution curve fitting\n") plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.2f}".format(fita[1]) + '*x^-' + "{:.2f}".format(fita[0]), ha='center') plt.plot(xdata, ydata, '.') # plt.plot(xdata,ydata,'.', label='data') plt.xlabel('k') plt.ylabel('clustering') plt.savefig(self.logging + '/zipf_coeffi.png') plt.close(0) plt.figure() plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.2f}".format(fita[1]) + '*x^-' + "{:.2f}".format(fita[0]), ha='center') plt.title("Degree-Clustering distribution curve fitting (log)\n") plt.loglog(xdata, ydata, '.') # plt.loglog(xdata,ydata,'.', label='data') plt.xlabel('log(k)') plt.ylabel('log(clustering)') plt.savefig(self.logging + '/zipf_coeffi_log.png') plt.close(0) return zipf_coeffi
def cohesive(self, windowSize): all_bs = nx.eigenvector_centrality(self.uG) searched = set() margin = set() windows = set() center = np.random.randint(0,len(self.nodes)-1) windows.add(center) margin.add(center) searched.add(center) while(len(windows) < windowSize): margin_bs = {} self._update_margin(searched, margin) for i in margin: margin_bs.update({i:all_bs[i]}) margin_bs_sort = texthelper.sortDict(margin_bs, By="value", reverse=True) for j in margin_bs_sort: windows.add(j[0]) if len(windows) >= windowSize: break return windows
def Cohesive(self, record=True): self.root = self._root + '/cohesive' if not os.path.isdir(self.root): os.mkdir(self.root) node_size = len(self.nodes) share_node_size = int(node_size * self.overlapRate) self.share_nodes = set() # all_bs = nx.betweenness_centrality(self.G) # all_bs = nx.eigenvector_centrality(self.uG) all_bs = nx.degree_centrality(self.uG) all_bs_sort = texthelper.sortDict(all_bs, By="value", reverse=True) # print(all_bs_sort) searched = set() margin = set() self.share_nodes = set() print(all_bs_sort[0:10]) center = all_bs_sort[0][0] self.share_nodes.add(center) margin.add(center) searched.add(center) while (len(self.share_nodes) < share_node_size): print('++') margin_bs = {} self._update_margin(searched, margin) for i in margin: margin_bs.update({i: all_bs[i]}) margin_bs_sort = texthelper.sortDict(margin_bs, By="value", reverse=True) # for i in margin_bs_sort: # print(i) # print('-----------------------') for j in margin_bs_sort: self.share_nodes.add(j[0]) if len(self.share_nodes) >= share_node_size: break self.subG1_nodes, self.subG2_nodes = [], [] residue_nodes = np.array(list(set(self.nodes) - self.share_nodes)) np.random.shuffle(residue_nodes) residue_nodes = set(residue_nodes) for i in range(int(len(residue_nodes) / 2)): self.subG1_nodes.append(residue_nodes.pop()) self.subG2_nodes.append(residue_nodes.pop()) while (len(residue_nodes) != 0): if np.random.rand() <= 0.5: self.subG1_nodes.append(residue_nodes.pop()) else: self.subG2_nodes.append(residue_nodes.pop()) if record == True: print('cohesive _record begin -----') self._record(statistic=False) print('cohesive _record over +++++') print('cohesive dataset begin -----') self.dataset() print('cohesive dataset over +++++')