def compute_nystrom(ds_name, use_node_labels, embedding_dim, community_detection_method, kernels): if ds_name == "SYNTHETIC": graphs, labels = generate_synthetic() else: graphs, labels = load_data(ds_name, use_node_labels) print('computing communities ...') communities, subgraphs = compute_communities(graphs, use_node_labels, community_detection_method) print("Number of communities: ", len(communities)) lens = [] for community in communities: lens.append(community.number_of_nodes()) print("Average size: %.2f" % np.mean(lens)) Q = [] for idx, k in enumerate(kernels): model = Nystrom(k, n_components=embedding_dim) model.fit(communities) Q_t = model.transform(communities) Q_t = np.vstack([np.zeros(embedding_dim), Q_t]) Q.append(Q_t) return Q, subgraphs, labels, Q_t.shape
def fit(self,X,y,batch_size=100): ''' Arguments: X: (array) The training data, must have two dimensions. y: (array) The training labels, must be one-hot encoded with two dimensions. To produce training labels in this format, one might use sklearn.preprocessing.LabelBinarizer. batch_size: (int) The mini-batch size for computing the stochastic gradients. ''' mapper = Nystrom(kernel=self.kernel,gamma=self.gamma,degree=self.degree, coef0=self.coef0,n=self.n,k=self.k,rand_svd=self.rand_svd); mapper.fit(X); self._Xrep = mapper._Xrep; clf = TFClassifier(loss=self.loss,alpha=self.alpha,optimizer=self.optimizer) clf.fit(mapper.transform(X),y,batch_size=batch_size); self.dual_coef_ = np.dot(mapper.A,clf.coef_.T).T; self.intercept_ = clf.intercept_; return
def compute_nystrom(use_node_labels, embedding_dim, community_detection_method, kernels): graphs_reg, labels_reg,graphs_gen, labels_gen,graphs_mal, labels_mal = load() graphs=graphs_reg+graphs_gen+graphs_mal labels=np.concatenate((labels_reg,labels_gen,labels_mal),axis=0) communities, subgraphs = compute_communities(graphs, use_node_labels, community_detection_method) print("Number of communities: ", len(communities)) lens = [] for community in communities: lens.append(community.number_of_nodes()) print("Average size: %.2f" % np.mean(lens)) Q=[] for idx, k in enumerate(kernels): model = Nystrom(k, n_components=embedding_dim) model.fit(communities) Q_t = model.transform(communities) Q_t = np.vstack([np.zeros(embedding_dim), Q_t]) Q.append(Q_t) return Q, subgraphs, labels, Q_t.shape
def compute_nystrom(ds_name, pct_data, use_node_labels, embedding_dim, community_detection_method, kernels, seed): communities_load_path = 'communities_dump_" + ds_name + "_balance_42.pkl' nystrom_load_path = "nystrom_dump_" + ds_name + "_balance_42.pkl" if os.path.exists(nystrom_load_path): print('loading Nystrom results from ', nystrom_load_path) return pkl.load(open(nystrom_load_path, 'rb')) if os.path.exists(communities_load_path): print("loading preprocessed communities data from", communities_load_path) communities, subgraphs = pkl.load(open(communities_load_path, 'rb')) else: if ds_name == "SYNTHETIC": graphs, labels = generate_synthetic() else: graphs, labels = load_data( dataset=ds_name, pct_data=pct_data, seed=seed) communities, subgraphs = compute_communities( graphs, use_node_labels, community_detection_method) print("Number of communities: ", len(communities)) print("dumping communities to", communities_load_path) lens = [] for community in communities: lens.append(community.number_of_nodes()) print("Average size: %.2f" % np.mean(lens)) sys.stdout.flush() Q = [] for idx, k in enumerate(kernels): model = Nystrom(k, n_components=embedding_dim) model.fit(communities) Q_t = model.transform(communities) Q_t = np.vstack([np.zeros(embedding_dim), Q_t]) Q.append(Q_t) print("Dumping Nystrom output to", nystrom_load_path) pkl.dump((Q, subgraphs, labels, Q_t.shape), open(nystrom_load_path, 'wb')) return Q, subgraphs, labels, Q_t.shape
lr_t = learning_rate min_avg_loss = np.inf avg_obj = 0. for t in range(1, n_epochs + 1): for batch_num in range(1, len(Xtr) // batch_size + 1): Xb, yb = get_next_batch(batch_size) di_obj, _ = sess.run([objective, train_step], feed_dict={ x: Xb, y: yb, lr: lr_t }) avg_obj = ((batch_num - 1) * avg_obj + di_obj) / batch_num print('Epoch %d with average KDI %f' % (t, avg_obj)) if t % lr_epochs == 0: lr_t = lr_t * 0.1 print('Fitting a predictor on the optimized Nystrom mapping.') kmap = Nystrom(kernel='rbf', gamma=gamma, n=n) kmap.fit(sess.run(kernel_func.X_rep)) sess.close() clf = TFClassifier(loss='logistic', alpha=0.) clf.fit(kmap.transform(Xtr), ytr) Accuracy = clf.score(kmap.transform(Xte), yte) print('Accuracy for feature dimensionality %d: %f' % (n, Accuracy))
def mpgk_aa(Gs, h, n_clusters, limit): N = len(Gs) if use_node_labels: d = Gs[0].node[list(Gs[0].nodes())[0]]['label'].size else: d = Gs[0].node[list(Gs[0].nodes())[0]]['attributes'].size idx = np.zeros(N + 1, dtype=np.int64) nbrs = dict() ndata = [] for i in range(N): n = Gs[i].number_of_nodes() idx[i + 1] = idx[i] + n nodes = list(Gs[i].nodes()) M = np.zeros((n, d)) nodes2idx = dict() for j in range(idx[i], idx[i + 1]): if use_node_labels: M[j - idx[i], :] = Gs[i].node[nodes[j - idx[i]]]['label'] else: M[j - idx[i], :] = Gs[i].node[nodes[j - idx[i]]]['attributes'] nodes2idx[nodes[j - idx[i]]] = j ndata.append(M) for node in nodes: nbrs[nodes2idx[node]] = list() for neighbor in Gs[i].neighbors(node): nbrs[nodes2idx[node]].append(nodes2idx[neighbor]) graph_hists = list() X = np.vstack(ndata) for it in range(1, h + 1): print("Iteration:", it) hists, nbrs_hists = compute_histograms(X, nbrs, n_clusters, limit) X = np.zeros((X.shape[0], 200)) ny = Nystrom(n_components=150) ny.fit(hists) X[:, :150] = ny.transform(hists) ny = Nystrom(n_components=50) ny.fit(nbrs_hists) X[:, 150:] = ny.transform(nbrs_hists) graph_hists.append(list()) for i in range(N): d = dict() for j in range(idx[i], idx[i + 1]): for n in hists[j]: if n in d: d[n] += hists[j][n] else: d[n] = hists[j][n] graph_hists[it - 1].append(d) K = np.zeros((N, N)) for it in range(h): for i in range(N): for j in range(i, N): for n in graph_hists[it][i]: if n in graph_hists[it][j]: K[i, j] += min(graph_hists[it][i][n], graph_hists[it][j][n]) K[j, i] = K[i, j] return K