def edgeEva(self, edge, adj, modelnum=20, edgenum=20): res = 0 n = adj.shape[0] for i in range(modelnum): tempadj = copy.deepcopy(adj) for j in range(edgenum): a = random.randint(0, n - 1) b = random.randint(0, n - 1) tempadj[a, b] = 1 tempadj[b, a] = 1 g = gemodel_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled)) g.train() p1 = g.performance() tempadj[edge[0], edge[1]] = 1 tempadj[edge[1], edge[0]] = 1 g = gemodel_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled)) g.train() p2 = g.performance() if p2 >= p1: res += 1 print('edge: {} evaluated {} better score'.format(edge, res)) return (res, (edge[0], edge[1]))
def edgeEvaNew(self, candidates, adj): res = collections.defaultdict(list) try: lencan = len(candidates) modelnum = int(2 * lencan * self.edgeevaltimes / self.poolnum / self.evalEdgeNum) + 1 # print('eval model num: {}'.format(modelnum)) for i in range(modelnum): addededge = [] tempadj = copy.deepcopy(adj) for j in range(self.evalEdgeNum): a, b = candidates[random.randint(0, lencan - 1)] tempadj[a, b] = 1 tempadj[b, a] = 1 addededge.append((a, b)) g = gemodel_GCN(tempadj, self.features, self.labels, split_t=self.split_t ) # , seed=self.seed, dropout=self.dropout) g.train() p1 = g.acu() for e in addededge: res[e].append(p1) except BaseException as err: print('raised exception edgeEvaNew: {}'.format(err)) return res
def edgeEvaNew(self, candidators, adj, edgenum=20): res = collections.defaultdict(list) try: # n = adj.shape[0] lencan = len(candidators) modelnum = int(2 * self.candinum * self.knn * self.edgeevaltimes / self.poolnum / edgenum) # print('modelnum', modelnum) for i in range(modelnum): addededge = [] tempadj = copy.deepcopy(adj) for j in range(edgenum): a, b = candidators[random.randint(0, lencan - 1)] tempadj[a, b] = 1 tempadj[b, a] = 1 addededge.append((a, b)) g = gemodel_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled)) g.train() p1 = g.performance() for e in addededge: res[e].append(p1) except BaseException as err: print('raised exception edgeEvaNew: {}'.format(err)) # print('edge: {} evaluated {} better score'.format(edge, res)) # print('return eval res:{}'.format(res)) return res
def par(self, es, prevadj): tempAdj = copy.deepcopy(prevadj) for i, j in es: tempAdj[i, j] = 1 tempAdj[j, i] = 1 g = gemodel_GCN(tempAdj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled), seed=self.seed, dropout=0) g.train() per = g.performance() # print('{}, {} finished'.format(i, j)) return per
def subseteval2(self, topset, prevadj): '''eval edge set performance, randomly and some edges from top set ''' # print('topset len: {}'.format(len(topset))) tempadj = copy.deepcopy(prevadj) eset = set() for i in range(self.edgeaddnum): ran = random.randint(0, len(topset)-1) eset.add(topset[ran]) a, b = topset[ran] tempadj[a, b] = 1 tempadj[b, a] = 1 g = gemodel_GCN(tempadj, self.features, self.labels, split_t=self.split_t, seed=self.seed, dropout=self.dropout) g.train() return (eset, g.acu())
def getembs(self, adj): print('generate several embds') embeddings = [] if self.default_model == 'GCN': a = utils.preprocess_graph(adj) for i in range(self.model_num): g = gemodel_GCN(a, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled)) g.train() emb = g.getembeddings() embeddings.append(emb) else: print('ERR: wrong default GE model for add edges') exit(-1) return embeddings
def subseteval(self, adj, candidators): tempadj = copy.deepcopy(adj) eset = set() for i in range(self.enum2add): ran = random.randint(0, len(candidators) - 1) eset.add(candidators[ran]) a, b = candidators[ran] tempadj[a, b] = 1 tempadj[b, a] = 1 # _, p = Modeltest_GCN.subprocess_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled), seed=1, dropout=0) g = gemodel_GCN(tempadj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled), seed=1, dropout=0) g.train() p = g.performance() return (eset, p)
def edgeReconstruction(self, prevAdj, embds, edgenum=20): better = worse = eq = 0 # multiprocessing.set_start_method('fork') p = Pool() res = [] res2 = [] enum = 30 for e in range(1000): es = [] for f in range(enum): a = self.deleted_edges[random.randint( 0, len(self.deleted_edges) - 1)] es.append((a[1], a[2])) r = p.apply_async(self.par, args=(es, prevAdj)) res.append(r) for e in range(1000): es = [] for f in range(enum): a = random.randint(0, prevAdj.shape[0] - 1) b = random.randint(0, prevAdj.shape[0] - 1) es.append((a, b)) r = p.apply_async(self.par, args=(es, prevAdj)) res2.append(r) p.close() p.join() g = gemodel_GCN(prevAdj, self.features, self.labels, split_t=(self.split_train, self.split_val, self.split_unlabeled), seed=self.seed, dropout=0) g.train() initperformance = g.performance() ret = [] for x in res: ret.append(x.get()) best = 0 worst = 100 for a in ret: best = max(best, a) worst = min(worst, a) if a > initperformance: better += 1 elif a < initperformance: worse += 1 else: eq += 1 ret2 = [] for x in res2: ret2.append(x.get()) better2 = worse2 = eq2 = 0 best2 = 0 worst2 = 100 for a in ret2: best2 = max(best2, a) worst2 = min(worst2, a) if a > initperformance: better2 += 1 elif a < initperformance: worse2 += 1 else: eq2 += 1 print( 'better2: {}, worse2: {}, eq2: {}, best2: {}, worse2: {}, init: {}' .format(better2, worse2, eq2, best2, worst2, initperformance)) print('better: {}, worse: {}, eq: {}, best: {}, worse: {}, init: {}'. format(better, worse, eq, best, worst, initperformance)) exit(-1)
split_train, split_val, split_unlabeled = split_t = Preprocess.splitdata( _N, labels, seed=123, share=share) distnum = 1000 dst = distEdge_ran(distnum) deletesizes = [1, 0.8, 0.5, 0.2] res = [] labelk = [] fo = '{}/{}' for i in range(len(deletesizes)): adj, remained, deleted = spa.delete_edges(_A_obs, k=deletesizes[i]) adj_d = dst.disturb(adj) print('disturb adj, add {} ran edges, prev {}, after {}'.format( len(deleted), adj.nnz, adj_d.nnz)) initadjset = edge2list.sett(adj) disadjset = edge2list.sett(adj_d) g = gemodel_GCN(adj_d, feas, labels, split_t=split_t, sGCN=True) g.train() print(g.acu()) W = g.model.W1.eval(session=g.model.session) fl = FastLoss(adj_d, feas, labels, W, split_val, deleted_e=deleted) bins, dens = fl.test(initset=initadjset, disset=disadjset) res.append((bins, dens)) labelk.append(fo.format(distnum, adj.nnz / 2)) Hist.subplots(res, labelk) # fl.test_deleted() # fl.test_remained(remained)
adj, remained, deleted = spa.delete_edges(_A_obs, k=percent) _N = _A_prev.shape[0] # split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels) #seed share as default # split_t = (split_train, split_val, split_unlabeled) # gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0) # run_time(gcn.train) # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu())) # gcn = gemodel_GCN(adj, feas, labels, split_t=split_t, seed=1, dropout=0) # gcn.train() # print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu())) split_train, split_val, split_unlabeled = Preprocess.splitdata(_N, labels, seed=12, share=(0.052, 0.3693)) #seed share as default split_t = (split_train, split_val, split_unlabeled) gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0) run_time(gcn.train) print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu())) gcn = gemodel_GCN(_A_prev, feas, labels, split_t=split_t, seed=1, dropout=0, sGCN=True) run_time(gcn.train) print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu())) gcn = gemodel_GCN(adj, feas, labels, split_t=split_t, seed=1, dropout=0) gcn.train() print('performance: {}, acu: {}'.format(gcn.performance(), gcn.acu())) # print('preprocess, delete some edges, remaind edges num(bi): {}'.format(adj.nnz)) # savefile = 'data/coradele.npz'