예제 #1
0
파일: datahelper.py 프로젝트: iszhaoxin/KGI
 def id2file(self, nodefn, edgefn):
     with open(nodefn, 'w') as f:
         for i in texthelper.sortDict(self.node2id, By="value"):
             f.write(i[0] + ' ' + str(i[1]) + '\n')
     with open(edgefn, 'w') as f:
         for i in texthelper.sortDict(self.edge2id, By="value"):
             f.write(i[0] + ' ' + str(i[1]) + '\n')
예제 #2
0
 def id2file(self):
     with open(os.path.dirname(self.file) + '/entity2id.txt', 'w') as f:
         for i in texthelper.sortDict(self.node2id, By="value"):
             f.write(i[0] + ' ' + str(i[1]) + '\n')
     with open(os.path.dirname(self.file) + '/relation2id.txt', 'w') as f:
         for i in texthelper.sortDict(self.edge2id, By="value"):
             f.write(i[0] + ' ' + str(i[1]) + '\n')
예제 #3
0
파일: datahelper.py 프로젝트: iszhaoxin/KGI
 def write2flie(self):
     with open(os.path.dirname(self.file) + '/entity2id.txt', 'w') as f:
         node2id = texthelper.sortDict(self.node2id, By="value")
         for i in node2id:
             f.write(str(i[0]) + ' ' + str(i[1]) + '\n')
     with open(os.path.dirname(self.file) + '/relation2id.txt', 'w') as f:
         relation2id = texthelper.sortDict(self.edge2id, By="value")
         for i in relation2id:
             f.write(str(i[0]) + ' ' + str(i[1]) + '\n')
예제 #4
0
파일: graph.py 프로젝트: iszhaoxin/KGI
    def zipf_coeffi(self, plot=True):
        # print(nx.average_clustering(graphmes.uG))
        degree = {}
        zipf_coeffi = {}
        for i in self.uG.nodes():
            if self.uG.degree(i) in degree:
                degree[self.uG.degree(i)].append(i)
            else:
                degree.update({self.uG.degree(i): [i]})
        for i in degree:
            zipf_coeffi.update({i: 0})
            for node in degree[i]:
                zipf_coeffi[i] += nx.clustering(self.uG, node)
            zipf_coeffi[i] /= len(degree[i])

        zipf_coeffi = np.array(texthelper.sortDict(zipf_coeffi, By="key"))

        if plot == False:
            return zipf_coeffi
        else:
            xdata = zipf_coeffi[:, 0]
            ydata = zipf_coeffi[:, 1]
            fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata)
            plt.figure()
            plt.title("Degree-Clustering distribution curve fitting\n")
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.2f}".format(fita[1]) + '*x^-' +
                     "{:.2f}".format(fita[0]),
                     ha='center')
            plt.plot(xdata, ydata, '.')
            # plt.plot(xdata,ydata,'.', label='data')
            plt.xlabel('k')
            plt.ylabel('clustering')
            plt.savefig(self.logging + '/zipf_coeffi.png')
            plt.close(0)

            plt.figure()
            plt.text(max(xdata) * 0.4,
                     max(ydata) * 0.4,
                     'y=' + "{:.2f}".format(fita[1]) + '*x^-' +
                     "{:.2f}".format(fita[0]),
                     ha='center')
            plt.title("Degree-Clustering distribution curve fitting (log)\n")
            plt.loglog(xdata, ydata, '.')
            # plt.loglog(xdata,ydata,'.', label='data')
            plt.xlabel('log(k)')
            plt.ylabel('log(clustering)')
            plt.savefig(self.logging + '/zipf_coeffi_log.png')
            plt.close(0)
            return zipf_coeffi
예제 #5
0
    def cohesive(self, windowSize):
        all_bs = nx.eigenvector_centrality(self.uG)

        searched = set()
        margin = set()
        windows = set()
        center = np.random.randint(0,len(self.nodes)-1)
        windows.add(center)
        margin.add(center)
        searched.add(center)

        while(len(windows) < windowSize):
            margin_bs = {}
            self._update_margin(searched, margin)
            for i in margin:
                margin_bs.update({i:all_bs[i]})
            margin_bs_sort = texthelper.sortDict(margin_bs, By="value", reverse=True)
            for j in margin_bs_sort:
                windows.add(j[0])
                if len(windows) >= windowSize:
                    break
        return windows
예제 #6
0
    def Cohesive(self, record=True):
        self.root = self._root + '/cohesive'
        if not os.path.isdir(self.root): os.mkdir(self.root)

        node_size = len(self.nodes)
        share_node_size = int(node_size * self.overlapRate)
        self.share_nodes = set()

        # all_bs = nx.betweenness_centrality(self.G)
        # all_bs = nx.eigenvector_centrality(self.uG)
        all_bs = nx.degree_centrality(self.uG)
        all_bs_sort = texthelper.sortDict(all_bs, By="value", reverse=True)
        # print(all_bs_sort)
        searched = set()
        margin = set()
        self.share_nodes = set()
        print(all_bs_sort[0:10])
        center = all_bs_sort[0][0]

        self.share_nodes.add(center)
        margin.add(center)
        searched.add(center)

        while (len(self.share_nodes) < share_node_size):
            print('++')
            margin_bs = {}
            self._update_margin(searched, margin)
            for i in margin:
                margin_bs.update({i: all_bs[i]})
            margin_bs_sort = texthelper.sortDict(margin_bs,
                                                 By="value",
                                                 reverse=True)
            # for i in margin_bs_sort:
            #     print(i)
            # print('-----------------------')
            for j in margin_bs_sort:
                self.share_nodes.add(j[0])
                if len(self.share_nodes) >= share_node_size:
                    break

        self.subG1_nodes, self.subG2_nodes = [], []
        residue_nodes = np.array(list(set(self.nodes) - self.share_nodes))
        np.random.shuffle(residue_nodes)
        residue_nodes = set(residue_nodes)

        for i in range(int(len(residue_nodes) / 2)):
            self.subG1_nodes.append(residue_nodes.pop())
            self.subG2_nodes.append(residue_nodes.pop())
        while (len(residue_nodes) != 0):
            if np.random.rand() <= 0.5:
                self.subG1_nodes.append(residue_nodes.pop())
            else:
                self.subG2_nodes.append(residue_nodes.pop())

        if record == True:
            print('cohesive _record begin -----')
            self._record(statistic=False)
            print('cohesive _record over +++++')

            print('cohesive dataset begin -----')
            self.dataset()
            print('cohesive dataset over +++++')