def calc_heat(G=None,graph_dict=None,start=-2,end=2,normalization="empty"):
    assert G is not None or graph_dict is not None, "Need to supply a graph, or graphs, via the parameters G or graph_dict"
    # Create a set of times ranging from 10**-4, to 10**2 on a logarithmic scale
    times = np.logspace(start,end,250)
    heat_dict = {"t":times}
    if G is not None:
        heat_dict["graph"] = heat(G,timescales=times,normalization=normalization)
    else:
        for label, graph in graph_dict.items():
            if type(graph) is list:
                print("--- calculating heat traces for {} graphs of type {} ---".format(len(graph),label))
                heat_list = [heat(g,timescales=times, normalization=normalization) for g in graph]
                heat_dict[label] = heat_list
            else:
                print("--- calculating {} eigenvalues (n={})".format(label,len(graph)))
                heats = heat(graph,timescales=times,normalization=normalization)
                heat_dict[label] = heats            
    return heat_dict
def main():
    parser = argparse.ArgumentParser(
        description='Experiment for graph classification with coarse graphs')
    parser.add_argument('--dataset',
                        type=str,
                        default="MUTAG",
                        help='name of dataset (default: MUTAG)')
    parser.add_argument('--method',
                        type=str,
                        default="mgc",
                        help='name of the coarsening method')
    parser.add_argument(
        '--ratio',
        type=float,
        default=0.2,
        help='the ratio between coarse and original graphs n/N')
    args = parser.parse_args()
    if args.dataset not in [
            "MUTAG", "ENZYMES", "NCI1", "NCI109", "PROTEINS", "PTC"
    ]:
        print("Incorrect input dataset")
        sys.exit()
    if args.method not in ['mgc', 'sgc']:
        print("Incorrect input coarsening method")
        sys.exit()
    if args.ratio < 0 or args.ratio > 1:
        print("Incorrect input ratio")
        sys.exit()
    dir = 'dataset'
    am, labels = parse.parse_dataset(dir, args.dataset)
    num_samples = len(am)
    X = np.zeros((num_samples, 250))
    Y = labels
    for i in range(num_samples):
        N = am[i].shape[0]
        n = int(np.ceil(args.ratio * N))
        if args.method == "mgc":
            coarse_method = coarsening.multilevel_graph_coarsening
        else:
            coarse_method = coarsening.spectral_graph_coarsening
        if n > 1:
            Gc, Q, idx = coarse_method(am[i], n)
        else:
            Gc, Q, idx = coarse_method(am[i], 1)
        G = nx.from_numpy_matrix(Gc)
        X[i] = netlsd.heat(G)
    acc, std = classification.KNN_classifier_kfold(X, Y, 10)
Example #3
0
def get_wne(dataset_name, sampled_dir='', cache=True):
    dataset_filename = os.path.abspath(
        os.path.join('data/{}'.format(dataset_name), sampled_dir,
                     'graph.edgelist'))
    labels = os.path.abspath(
        os.path.join(os.path.dirname(dataset_filename), 'label.txt'))
    save_path = os.path.abspath(
        os.path.join('embeddings/{}'.format(dataset_name), sampled_dir,
                     'wme.embeddings'))
    if (not cache) or (not os.path.exists(save_path)) or (
            os.path.getmtime(save_path) < os.path.getmtime(dataset_filename)):
        G = utils.load_graph(dataset_filename, label_name=None)
        do_full = (G.number_of_nodes() < 10000)
        eigenvalues = 'full' if do_full else 'auto'
        wne = netlsd.heat(G,
                          timescales=np.logspace(-2, 2, 10),
                          eigenvalues=eigenvalues)
        with utils.write_with_create(save_path) as f:
            print(" ".join(map(str, wne)), file=f)
    return np.loadtxt(save_path)
Example #4
0
 def __extract(self, nx_g: networkx.Graph) -> torch.Tensor:
     return torch.tensor(netlsd.heat(nx_g, *self.__args,
                                     **self.__kwargs)).view(-1)
Example #5
0
 def graph_properties(g):
     return netlsd.heat(g)
Example #6
0
import netlsd
import networkx as nx

# g = nx.erdos_renyi_graph(100, 0.01) # create a random graph with 100 nodes
g = nx.Graph()
g.add_nodes_from([1,2,3])
g.add_edges_from([(1, 2), (1, 3)])
g.add_node("spam")
g.add_nodes_from("spam")
g.add_edge(3,'m')

descriptor = netlsd.heat(g) # compute the signature

print("end")
Example #7
0
def classify(dataset,
             queries,
             dataset_cls,
             target,
             args,
             dataset0=None,
             queries0=None):
    """
    classification tasks using various methods.
    dataset0, queries0 are original, non-sketched graphs. dataset, queries contain sketched graphs.
    """
    if dataset0 is None:
        dataset0 = dataset
        queries0 = queries
    #with open(args.graph_fname, 'rb') as f:
    #    graphs = pickle.read(f)
    n_data = len(dataset)
    n_queries = len(queries)
    ot_cost = np.zeros((len(queries), len(dataset)))

    netlsd_cost = np.zeros((len(queries), len(dataset)))

    Ly_mx = []
    Lx_mx = []
    data_graphs = []
    heat_l = []
    #avg_deg = 0
    for i, data in enumerate(dataset):
        #pdb.set_trace()
        if isinstance(data, torch.Tensor):
            L = data
        else:
            n_nodes = len(data.nodes())
            L = utils.graph_to_lap(data)

        avg_deg = (L.diag().mean())
        L /= avg_deg
        #Ly_mx.append(L[torch.triu(torch.ones(n_nodes, n_nodes), diagonal=1) > 0])
        Ly_mx.append(L)
        #pdb.set_trace()

        heat_l.append(netlsd.heat(L.numpy()))
    #avg_deg /= len(dataset)

    for i, q in enumerate(tqdm(queries, desc='queries')):
        '''###
        if isinstance(data, torch.Tensor):
            L = data
        else:
            n_nodes = len(data.nodes())
            L = utils.graph_to_lap(data)

        '''
        Lx = utils.graph_to_lap(q)
        avg_deg = (Lx.diag().mean())
        Lx /= avg_deg

        args.Lx = Lx

        args.m = len(q.nodes())
        q_heat = netlsd.heat(Lx.numpy())
        Lx_mx.append(args.Lx)

        for j, data in enumerate(dataset):

            Ly = Ly_mx[j].clone()
            args.n = len(Ly)
            min_loss = 10000

            for _ in range(1):
                loss, P, Ly_ = graph.graph_dist(args,
                                                plot=False,
                                                Ly=Ly,
                                                take_ly_exp=False)
                #pdb.set_trace()
                if loss < min_loss:
                    min_loss = loss

            ot_cost[i][j] = min_loss
            netlsd_cost[i][j] = netlsd.compare(q_heat, heat_l[j])

    if args.dataset_type == 'real':
        ot_cost1 = (ot_cost - ot_cost.mean()) / np.std(ot_cost)
        ot_pred = ot_cost.argmin(1)
        ot_acc00 = np.equal(dataset_cls[ot_pred], target).sum() / len(target)

        print('OT ACC |{} '.format(ot_acc00))

        ot_sorted = np.argsort(ot_cost, axis=-1)

        #pdb.set_trace()
        ot_cls = dataset_cls[ot_sorted[:, :3]].tolist()

        combine_pred = np.zeros(len(target))
        for i, ot_c in enumerate(ot_cls):

            counter = collections.Counter()
            counter.update(ot_c)

            #pdb.set_trace()
            common = counter.most_common(1)[0][0]
            combine_pred[i] = common

        combine_acc = np.equal(combine_pred, target).sum() / len(target)
        #pdb.set_trace()
        ###
        ot_pred = ot_cost.argmin(1)
        ot_acc = np.equal(dataset_cls[ot_pred], target).sum() / len(target)

        netlsd_pred = netlsd_cost.argmin(1)
        netlsd_acc = np.equal(dataset_cls[netlsd_pred],
                              target).sum() / len(target)
        print('OT ACC |{} '.format(ot_acc))
        return ot_acc00, netlsd_acc

    ot_cost_ = torch.from_numpy(ot_cost)
    #for combined, can add dist here
    ot_cost_ranks = torch.argsort(ot_cost_, -1)[:, :args.n_per_cls]
    ones = torch.ones(args.n_per_cls * 3)  #args.n_per_cls*2 (n_cls*2) 100
    ot_cls = np.ones(n_queries)

    combine_cls = np.ones(n_queries)
    dataset_cls_t = torch.from_numpy(dataset_cls)
    #pdb.set_trace()
    for i in range(n_queries):  #for each cls
        cur_ranks_ot = dataset_cls_t[ot_cost_ranks[i]]
        ranked = torch.zeros(100)  #n_cls*2
        ranked.scatter_add_(src=ones, index=cur_ranks_ot, dim=-1)
        ot_cls[i] = torch.argmax(ranked).item()

    ot_cost_means = np.mean(ot_cost.reshape(n_queries,
                                            n_data // args.n_per_cls,
                                            args.n_per_cls),
                            axis=-1)
    ot_idx = np.argmin(ot_cost_means, axis=-1) * args.n_per_cls

    print('ot_cost mx ', ot_cost)
    ot_cls1 = dataset_cls[ot_idx]
    ot_acc, ot_acc1 = np.equal(ot_cls, target).sum() / len(target), np.equal(
        ot_cls1, target).sum() / len(target)
    print('ot acc1 {} ot acc {} '.format(ot_acc1, ot_acc))
Example #8
0
 def _transform(self, data):
     dsc = torch.FloatTensor(
         [netlsd.heat(data.G, *self._args, **self._kwargs)])
     data.gf = torch.cat([data.gf, dsc], dim=1)
     return data