def calc_heat(G=None,graph_dict=None,start=-2,end=2,normalization="empty"): assert G is not None or graph_dict is not None, "Need to supply a graph, or graphs, via the parameters G or graph_dict" # Create a set of times ranging from 10**-4, to 10**2 on a logarithmic scale times = np.logspace(start,end,250) heat_dict = {"t":times} if G is not None: heat_dict["graph"] = heat(G,timescales=times,normalization=normalization) else: for label, graph in graph_dict.items(): if type(graph) is list: print("--- calculating heat traces for {} graphs of type {} ---".format(len(graph),label)) heat_list = [heat(g,timescales=times, normalization=normalization) for g in graph] heat_dict[label] = heat_list else: print("--- calculating {} eigenvalues (n={})".format(label,len(graph))) heats = heat(graph,timescales=times,normalization=normalization) heat_dict[label] = heats return heat_dict
def main(): parser = argparse.ArgumentParser( description='Experiment for graph classification with coarse graphs') parser.add_argument('--dataset', type=str, default="MUTAG", help='name of dataset (default: MUTAG)') parser.add_argument('--method', type=str, default="mgc", help='name of the coarsening method') parser.add_argument( '--ratio', type=float, default=0.2, help='the ratio between coarse and original graphs n/N') args = parser.parse_args() if args.dataset not in [ "MUTAG", "ENZYMES", "NCI1", "NCI109", "PROTEINS", "PTC" ]: print("Incorrect input dataset") sys.exit() if args.method not in ['mgc', 'sgc']: print("Incorrect input coarsening method") sys.exit() if args.ratio < 0 or args.ratio > 1: print("Incorrect input ratio") sys.exit() dir = 'dataset' am, labels = parse.parse_dataset(dir, args.dataset) num_samples = len(am) X = np.zeros((num_samples, 250)) Y = labels for i in range(num_samples): N = am[i].shape[0] n = int(np.ceil(args.ratio * N)) if args.method == "mgc": coarse_method = coarsening.multilevel_graph_coarsening else: coarse_method = coarsening.spectral_graph_coarsening if n > 1: Gc, Q, idx = coarse_method(am[i], n) else: Gc, Q, idx = coarse_method(am[i], 1) G = nx.from_numpy_matrix(Gc) X[i] = netlsd.heat(G) acc, std = classification.KNN_classifier_kfold(X, Y, 10)
def get_wne(dataset_name, sampled_dir='', cache=True): dataset_filename = os.path.abspath( os.path.join('data/{}'.format(dataset_name), sampled_dir, 'graph.edgelist')) labels = os.path.abspath( os.path.join(os.path.dirname(dataset_filename), 'label.txt')) save_path = os.path.abspath( os.path.join('embeddings/{}'.format(dataset_name), sampled_dir, 'wme.embeddings')) if (not cache) or (not os.path.exists(save_path)) or ( os.path.getmtime(save_path) < os.path.getmtime(dataset_filename)): G = utils.load_graph(dataset_filename, label_name=None) do_full = (G.number_of_nodes() < 10000) eigenvalues = 'full' if do_full else 'auto' wne = netlsd.heat(G, timescales=np.logspace(-2, 2, 10), eigenvalues=eigenvalues) with utils.write_with_create(save_path) as f: print(" ".join(map(str, wne)), file=f) return np.loadtxt(save_path)
def __extract(self, nx_g: networkx.Graph) -> torch.Tensor: return torch.tensor(netlsd.heat(nx_g, *self.__args, **self.__kwargs)).view(-1)
def graph_properties(g): return netlsd.heat(g)
import netlsd import networkx as nx # g = nx.erdos_renyi_graph(100, 0.01) # create a random graph with 100 nodes g = nx.Graph() g.add_nodes_from([1,2,3]) g.add_edges_from([(1, 2), (1, 3)]) g.add_node("spam") g.add_nodes_from("spam") g.add_edge(3,'m') descriptor = netlsd.heat(g) # compute the signature print("end")
def classify(dataset, queries, dataset_cls, target, args, dataset0=None, queries0=None): """ classification tasks using various methods. dataset0, queries0 are original, non-sketched graphs. dataset, queries contain sketched graphs. """ if dataset0 is None: dataset0 = dataset queries0 = queries #with open(args.graph_fname, 'rb') as f: # graphs = pickle.read(f) n_data = len(dataset) n_queries = len(queries) ot_cost = np.zeros((len(queries), len(dataset))) netlsd_cost = np.zeros((len(queries), len(dataset))) Ly_mx = [] Lx_mx = [] data_graphs = [] heat_l = [] #avg_deg = 0 for i, data in enumerate(dataset): #pdb.set_trace() if isinstance(data, torch.Tensor): L = data else: n_nodes = len(data.nodes()) L = utils.graph_to_lap(data) avg_deg = (L.diag().mean()) L /= avg_deg #Ly_mx.append(L[torch.triu(torch.ones(n_nodes, n_nodes), diagonal=1) > 0]) Ly_mx.append(L) #pdb.set_trace() heat_l.append(netlsd.heat(L.numpy())) #avg_deg /= len(dataset) for i, q in enumerate(tqdm(queries, desc='queries')): '''### if isinstance(data, torch.Tensor): L = data else: n_nodes = len(data.nodes()) L = utils.graph_to_lap(data) ''' Lx = utils.graph_to_lap(q) avg_deg = (Lx.diag().mean()) Lx /= avg_deg args.Lx = Lx args.m = len(q.nodes()) q_heat = netlsd.heat(Lx.numpy()) Lx_mx.append(args.Lx) for j, data in enumerate(dataset): Ly = Ly_mx[j].clone() args.n = len(Ly) min_loss = 10000 for _ in range(1): loss, P, Ly_ = graph.graph_dist(args, plot=False, Ly=Ly, take_ly_exp=False) #pdb.set_trace() if loss < min_loss: min_loss = loss ot_cost[i][j] = min_loss netlsd_cost[i][j] = netlsd.compare(q_heat, heat_l[j]) if args.dataset_type == 'real': ot_cost1 = (ot_cost - ot_cost.mean()) / np.std(ot_cost) ot_pred = ot_cost.argmin(1) ot_acc00 = np.equal(dataset_cls[ot_pred], target).sum() / len(target) print('OT ACC |{} '.format(ot_acc00)) ot_sorted = np.argsort(ot_cost, axis=-1) #pdb.set_trace() ot_cls = dataset_cls[ot_sorted[:, :3]].tolist() combine_pred = np.zeros(len(target)) for i, ot_c in enumerate(ot_cls): counter = collections.Counter() counter.update(ot_c) #pdb.set_trace() common = counter.most_common(1)[0][0] combine_pred[i] = common combine_acc = np.equal(combine_pred, target).sum() / len(target) #pdb.set_trace() ### ot_pred = ot_cost.argmin(1) ot_acc = np.equal(dataset_cls[ot_pred], target).sum() / len(target) netlsd_pred = netlsd_cost.argmin(1) netlsd_acc = np.equal(dataset_cls[netlsd_pred], target).sum() / len(target) print('OT ACC |{} '.format(ot_acc)) return ot_acc00, netlsd_acc ot_cost_ = torch.from_numpy(ot_cost) #for combined, can add dist here ot_cost_ranks = torch.argsort(ot_cost_, -1)[:, :args.n_per_cls] ones = torch.ones(args.n_per_cls * 3) #args.n_per_cls*2 (n_cls*2) 100 ot_cls = np.ones(n_queries) combine_cls = np.ones(n_queries) dataset_cls_t = torch.from_numpy(dataset_cls) #pdb.set_trace() for i in range(n_queries): #for each cls cur_ranks_ot = dataset_cls_t[ot_cost_ranks[i]] ranked = torch.zeros(100) #n_cls*2 ranked.scatter_add_(src=ones, index=cur_ranks_ot, dim=-1) ot_cls[i] = torch.argmax(ranked).item() ot_cost_means = np.mean(ot_cost.reshape(n_queries, n_data // args.n_per_cls, args.n_per_cls), axis=-1) ot_idx = np.argmin(ot_cost_means, axis=-1) * args.n_per_cls print('ot_cost mx ', ot_cost) ot_cls1 = dataset_cls[ot_idx] ot_acc, ot_acc1 = np.equal(ot_cls, target).sum() / len(target), np.equal( ot_cls1, target).sum() / len(target) print('ot acc1 {} ot acc {} '.format(ot_acc1, ot_acc))
def _transform(self, data): dsc = torch.FloatTensor( [netlsd.heat(data.G, *self._args, **self._kwargs)]) data.gf = torch.cat([data.gf, dsc], dim=1) return data