Esempio n. 1
0
def load_pdata(dataset_str):
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in xrange(len(names)):
        objects.append(pkl.load(open("./data/ind.{}.{}".format(dataset_str, names[i]))))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("./data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)
    if dataset_str == 'citeseer':
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended
    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))

    train_mask = sample_mask(idx_train, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    train_out = []
    for i in idx_train:
        ll = y_train[i].tolist()
        ll = ll.index(1) + 1
        train_out.append([i, ll])
    train_out = np.array(train_out)
    np.random.shuffle(train_out)

    test_out = []
    for i in idx_test:
        ll = y_test[i].tolist()
        ll = ll.index(1) + 1
        test_out.append([i, ll])
    test_out = np.array(test_out)
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_mask = int(np.floor(edges.shape[0] / 10.))

    return graph, features, train_out, test_out
def skeleton_to_nx_graph(skeleton):
  """Converts a binary skeleton image to a networkx graph
  
  Arguments:
    skeleton (array): 2d/3d binary skeleton image
    
  Returns:
    dict: dict of adjacency information with entries node_id : [neighbours]
  """
  
  ids,nh = skeleton_to_list(skeleton, with_neighborhoods = True);
  print('ids done...'); 
 
  if len(ids) == 0:
     return nx.Graph();
  elif len(ids) == 1:
    adj = {};
    adj[tuple(ids[0])] = [];
    return nx.from_dict_of_lists(adj);
  else:
    g = nx.Graph();
    for i,pos in enumerate(ids):
      if i % 500 == 0:
          print('%d/%d nodes constructed...' % (i, len(ids)));
      p = tuple(pos);
      g.add_node(p);
      posnh = np.where(nh[i]);
      for pp in np.transpose(posnh):
          g.add_edge(p, tuple(pp+pos-1));
    return g;
def draw_graph(graphDic, nodesStatus, imageName):
    node_colors = [] 
    #first writing the number of nodes 
    #nx.draw(G) 
    #select the color 
    newGraphDic = {} #without the status 
    for element in graphDic.keys():
        status = nodesStatus[element[0] - 1]
        if status == "INACTIVE":
            node_colors +=['white']
        if status == "ACTIVE":
            node_colors +=['red']
        if status == "SELECTED":
            node_colors +=['green']
    #generating the graph from the dictionary 
    G = nx.from_dict_of_lists(graphDic) 
    nx.draw_circular(G, node_color = node_colors, with_labels=True, node_size = 50)
    #G.text(3, 8, 'boxed italics text in data coords', style='italic', bbox={'facecolor':'red', 'alpha':0.5, 'pad':10})
#    plt.legend(handles=[ green_patch])
#    nx.draw_networkx(G, node_color=node_colors, with_labels=True)
    
    #nx.draw_networkx(G) 
    #save the result  semiSparseRep 
    print "image name is" + imageName 
    plt.savefig(imageName);
Esempio n. 4
0
 def shortest_path(self, target):
     
     #checkmark 1
     d0 = time.clock()
     dict_links = {self.url[24:]:WikiWeb(self.url).links()}
     links = WikiWeb(self.url).links()
     wiki = 'https://en.wikipedia.org'
     print(time.clock()-d0)
     
     #checkmark 2
     d0 = time.clock()
     count=0
     while target[24:] not in links:
         link = links[count]
         dict_links.update({link:WikiWeb(wiki+link).links()})
         for link1 in dict_links[link]:
             if link1 not in links:
                 links.append(link1)
         count+=1
     print(time.clock()-d0)
     
     #checkmark 3
     d0 = time.clock()
     gr = nx.from_dict_of_lists(dict_links)
     sp = nx.shortest_path(gr, self.url[24:], target[24:])
     print(time.clock()-d0)
     
     return sp
     
     '''
Esempio n. 5
0
def from_ajacency_map(amap, directed = False):
    """
        Turns a map of adjacencies into a graph.

        amap: Adjacency Dict
        direct: If set to true, an undirected graph will be created.
    """
    return nx.from_dict_of_lists(amap, nx.DiGraph() if directed else nx.Graph())
Esempio n. 6
0
def parse_graph(file_string):
    # Open the file and decode the json information
    with open(file_string, 'r') as f:
        data = json.load(f)

    # Create a networkx graph from our adjacency list data
    G = nx.from_dict_of_lists(data)

    return G
Esempio n. 7
0
def neighbourhoods(distribution, areal_units, classes=None):
    """ Return the neighbourhoods where different classes gather

    Parameter
    ---------

    distribution: nested dictionaries
        Number of people per class, per areal unit as given in the raw data
        (ungrouped). The dictionary must have the following formatting:
        > {areal_id: {class_id: number}}

    areal_units: dictionnary
        Dictionnary of areal unit ids with shapely polygon object representing
        the unit's geometry as values.

    classes: dictionary of lists
        When the original categories need to be aggregated into different
        classes. 
        > {class: [categories belonging to this class]}
        This can be arbitrarily imposed, or computed with uncover_classes
        function of this package.

    Returns
    -------

    neighbourhoods: dictionary
        Dictionary of classes names with list of neighbourhoods (that are
        each represented by a list of areal unit)
        > {'class': [ [areal units in cluster i], ...]}
    """

    # Regroup into classes if specified. Otherwise return categories indicated
    # in the data
    if not classes:
       classes = return_categories(distribution) 

    ## Find the areal units where classes are overrepresented
    or_units = overrepresented_units(distribution, classes)
    
    ## Compute the adjacency list
    adjacency = _adjacency(areal_units)

    ## Extract neighbourhooods as connected components
    G = nx.from_dict_of_lists(adjacency) # Graph from adjacency
    neighbourhoods = {cl: [list(subgraph) for subgraph in
                            nx.connected_component_subgraphs(G.subgraph(or_units[cl]))]
                        for cl in classes}

    return neighbourhoods
Esempio n. 8
0
def load_data(dataset_str):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
Esempio n. 9
0
def edges_to_matrix(filename):
    data = np.loadtxt(filename)

    adjdict = dict()

    data = data.astype(int)
    for i in xrange(data.shape[0]):
        if adjdict.has_key(data[i,0]):
            adjdict[data[i,0]].append(data[i,1])
        else:
            adjdict[data[i,0]] = [data[i,1]]
    for i in xrange(data.shape[0]):
        if not adjdict.has_key(data[i,1]):
            adjdict[data[i,1]] = []

    nodes_set = set()
    for i in xrange(data.shape[0]):
        for j in xrange(data.shape[1]):
            nodes_set.add(data[i,j])
    nodes_list = list(nodes_set)
    values = dict()
    for i in xrange(len(nodes_list)):
        values[nodes_list[i]] = i

    refactored = dict()

    for node in adjdict.keys():
        edges = adjdict[node]
        refactored_edges = []
        for e in edges:
            refactored_edges.append(values[e])
        refactored[values[node]] = refactored_edges

    G = nx.from_dict_of_lists(refactored, create_using=nx.DiGraph())

    n = len(G.nodes())
    A = np.zeros((n,n))
    for u in G.nodes():
        for v in refactored[u]:
            A[u,v] = 1

    return A

    import numpy as np
    import networkx as nx
Esempio n. 10
0
def game_from_file(filename):

	game = Game()
	game.network = nx.from_dict_of_lists(json.loads(open(filename).read()))


	# We split up the graphname, to get
	# -the number of players,
	# -the number of seed per player,
	# -the graph id.
	basename = os.path.basename(filename)

	num_list = map(int, basename.split(".")[:3])

	game.num_players = num_list[0]
	game.num_seeds = num_list[1]
	game.id = num_list[2]

	return game
Esempio n. 11
0
def load_data(dataset):
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i]))))
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
    tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
    tx_extended[test_idx_range-min(test_idx_range), :] = tx
    tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features
def get_networkx_graph_from_array(binary_arr):
    """
    Return a networkx graph from a binary numpy array
    Parameters
    ----------
    binary_arr : numpy array
        binary numpy array can only be 2D Or 3D

    Returns
    -------
    networkx_graph : Networkx graph
        graphical representation of the input array after clique removal
    """
    assert np.max(binary_arr) in [0, 1], "input must always be a binary array"
    start = time.time()
    dict_of_indices_and_adjacent_coordinates = _set_adjacency_list(binary_arr)
    networkx_graph = nx.from_dict_of_lists(dict_of_indices_and_adjacent_coordinates)
    _remove_clique_edges(networkx_graph)
    print("time taken to obtain networkxgraph is %0.3f seconds" % (time.time() - start))
    return networkx_graph
Esempio n. 13
0
def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i]))))
    x, tx, allx, graph = tuple(objects)

    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features
Esempio n. 14
0
    def process(self):
        """Loads input data from data directory

        ind.name.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
        ind.name.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
        ind.name.allx => the feature vectors of both labeled and unlabeled training instances
            (a superset of ind.name.x) as scipy.sparse.csr.csr_matrix object;
        ind.name.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
        ind.name.ty => the one-hot labels of the test instances as numpy.ndarray object;
        ind.name.ally => the labels for instances in ind.name.allx as numpy.ndarray object;
        ind.name.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
            object;
        ind.name.test.index => the indices of test instances in graph, for the inductive setting as list object.
        """
        root = self.raw_path
        objnames = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(objnames)):
            with open("{}/ind.{}.{}".format(root, self.name, objnames[i]),
                      'rb') as f:
                objects.append(_pickle_load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)
        test_idx_reorder = _parse_index_file("{}/ind.{}.test.index".format(
            root, self.name))
        test_idx_range = np.sort(test_idx_reorder)

        if self.name == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = range(min(test_idx_reorder),
                                        max(test_idx_reorder) + 1)
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        graph = nx.DiGraph(nx.from_dict_of_lists(graph))

        onehot_labels = np.vstack((ally, ty))
        onehot_labels[test_idx_reorder, :] = onehot_labels[test_idx_range, :]
        labels = np.argmax(onehot_labels, 1)

        idx_test = test_idx_range.tolist()
        idx_train = range(len(y))
        idx_val = range(len(y), len(y) + 500)

        train_mask = generate_mask_tensor(
            _sample_mask(idx_train, labels.shape[0]))
        val_mask = generate_mask_tensor(_sample_mask(idx_val, labels.shape[0]))
        test_mask = generate_mask_tensor(
            _sample_mask(idx_test, labels.shape[0]))

        self._graph = graph
        g = from_networkx(graph)

        g.ndata['train_mask'] = train_mask
        g.ndata['val_mask'] = val_mask
        g.ndata['test_mask'] = test_mask
        g.ndata['label'] = F.tensor(labels)
        g.ndata['feat'] = F.tensor(_preprocess_features(features),
                                   dtype=F.data_type_dict['float32'])
        self._num_classes = onehot_labels.shape[1]
        self._labels = labels
        self._g = g

        if self.verbose:
            print('Finished data loading and preprocessing.')
            print('  NumNodes: {}'.format(self._g.number_of_nodes()))
            print('  NumEdges: {}'.format(self._g.number_of_edges()))
            print('  NumFeats: {}'.format(self._g.ndata['feat'].shape[1]))
            print('  NumClasses: {}'.format(self.num_classes))
            print('  NumTrainingSamples: {}'.format(
                F.nonzero_1d(self._g.ndata['train_mask']).shape[0]))
            print('  NumValidationSamples: {}'.format(
                F.nonzero_1d(self._g.ndata['val_mask']).shape[0]))
            print('  NumTestSamples: {}'.format(
                F.nonzero_1d(self._g.ndata['test_mask']).shape[0]))
Esempio n. 15
0
		else:
			print 'sciezka'
		print q
		if once:
			return True
	else:
		visited[v] = True
		for x in g[v]:
			if not visited[x]:
				found = hamiltonian_backend(g, x, visited, once)
				if found:
					return True
		visited[v] = False
	q.pop(-1)


def hamiltonian(g,once=False):
	v = [False] * len(g)
	return hamiltonian_backend(g, 1, v,once)



if __name__ == "__main__":
	hamiltonian(graph,once=True)
	G = nx.from_dict_of_lists(graph, create_using=nx.MultiDiGraph())
	nx.draw(G)
	plt.show()



Esempio n. 16
0
def load_data(dataset_str, train_size, validation_size = 500, timeseta = 3, validate = False, shuffle=True):
	"""Load data."""
	if dataset_str in ['USPS-Fea', 'CIFAR-Fea', 'Cifar_10000_fea', 'Cifar_R10000_fea', 'MNIST-Fea', 'MNIST-10000', 'MNIST-5000']:
		data = sio.loadmat('data/{}.mat'.format(dataset_str))
		l = data['labels'].flatten()
		labels = np.zeros([l.shape[0],np.max(data['labels'])+1])
		labels[np.arange(l.shape[0]), l.astype(np.int8)] = 1
		features = data['X']
		sample = features[0].copy()
		adj = data['G']
	else:
		names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
		objects = []
		for i in range(len(names)):
			with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
				if sys.version_info > (3, 0):
					objects.append(pkl.load(f, encoding='latin1'))
				else:
					objects.append(pkl.load(f))

		x, y, tx, ty, allx, ally, graph = tuple(objects)
		adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
		test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
		test_idx_range = np.sort(test_idx_reorder)

		if dataset_str == 'citeseer':
			test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
			tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
			tx_extended[test_idx_range - min(test_idx_range), :] = tx
			tx = tx_extended
			ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
			ty_extended[test_idx_range - min(test_idx_range), :] = ty
			ty = ty_extended

		features = sp.vstack((allx, tx)).tolil()
		# features = sp.eye(features.shape[0]).tolil()
		# features = sp.lil_matrix(allx)

		labels = np.vstack((ally, ty))
		# labels = np.vstack(ally)

		features[test_idx_reorder, :] = features[test_idx_range, :]
		labels[test_idx_reorder, :] = labels[test_idx_range, :]
		features = preprocess_features(features)

	global all_labels
	all_labels = labels.copy()

	# split the data set
	idx = np.arange(len(labels))
	no_class = labels.shape[1]  # number of class
	train_size = [train_size for i in range(labels.shape[1])]
	if shuffle:
		np.random.shuffle(idx)
	idx_train = []
	count = [0 for i in range(no_class)]
	label_each_class = train_size
	next = 0
	for i in idx:
		if count == label_each_class:
			break
		next += 1
		for j in range(no_class):
			if labels[i, j] and count[j] < label_each_class[j]:
				idx_train.append(i)
				count[j] += 1

	test_size = None
	if validate:
		if test_size:
			assert next+validation_size<len(idx)
		idx_val = idx[next:next+validation_size]
		assert next+validation_size+test_size < len(idx)
		idx_test = idx[-test_size:] if test_size else idx[next+validation_size:]

	else:
		if test_size:
			assert next+test_size<len(idx)
		idx_val = idx[-test_size:] if test_size else idx[next:]
		idx_test = idx[-test_size:] if test_size else idx[next:]

	print('labels of each class : ', np.sum(labels[idx_train], axis=0))
	
	eta = np.float(adj.shape[0])/(np.float(adj.sum())/adj.shape[0])**2
	t = (labels[idx_train].sum(axis=0)*timeseta*eta/labels[idx_train].sum()).astype(np.int64)
	
	features = torch.FloatTensor(np.array(features.todense()))
	labels = torch.LongTensor(np.argmax(labels,1))
	adj = adj + sp.eye(adj.shape[0])
	adj = normalize_adj(adj)
	adj = sparse_mx_to_torch_sparse_tensor(adj)
	idx_train = torch.LongTensor(idx_train)
	idx_val = torch.LongTensor(idx_val)
	idx_test = torch.LongTensor(idx_test)

	return adj, features, labels, idx_train, idx_val, idx_test, t
Esempio n. 17
0
def load_data_vis_multi(dataset_str, use_trainval, X_dense_file, train_y_file, graph_file, test_index_file):
    """Load data."""
    names = [X_dense_file, train_y_file, graph_file]
    objects = []
    for name in names:
        with open(os.path.join(dataset_str, name), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    allx, ally, graph = tuple(objects)
    with open(os.path.join(dataset_str, test_index_file), 'rb') as f:
        train_test_mask = pkl.load(f)

    features = allx
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    labels = np.array(ally)

    idx_test = []
    idx_train = []
    idx_trainval = []

    if use_trainval:
        for i in range(len(train_test_mask)):

            if train_test_mask[i] == 0:
                idx_train.append(i)
            if train_test_mask[i] == 1:
                idx_test.append(i)

            if train_test_mask[i] >= 0:
                idx_trainval.append(i)
    else:
        for i in range(len(train_test_mask)):

            if train_test_mask[i] >= 0:
                idx_train.append(i)
            if train_test_mask[i] == 1:
                idx_test.append(i)

            if train_test_mask[i] >= 0:
                idx_trainval.append(i)

    idx_val = idx_test

    train_mask = sample_mask_sigmoid(idx_train, labels.shape[0], labels.shape[1])
    train_adj_mask = sample_mask_sigmoid(idx_train, labels.shape[0], labels.shape[0])

    val_mask = sample_mask_sigmoid(idx_val, labels.shape[0], labels.shape[1])
    val_adj_mask = sample_mask_sigmoid(idx_val, labels.shape[0], labels.shape[0])

    trainval_mask = sample_mask_sigmoid(idx_trainval, labels.shape[0], labels.shape[1])
    trainval_adj_mask = sample_mask_sigmoid(idx_trainval, labels.shape[0], labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_trainval = np.zeros(labels.shape)

    y_train[train_mask] = labels[train_mask]
    y_val[val_mask] = labels[val_mask]
    y_trainval[trainval_mask] = labels[trainval_mask]

    return adj, features, y_train, train_mask, train_adj_mask, val_mask, val_adj_mask, trainval_mask, trainval_adj_mask
Esempio n. 18
0
def load_data(dataset="cora", modified=False, attacked=False):
    """
    Load Citation Networks Datasets.
    """
    path = '../LAGCN/'
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    if modified:
        names[-1] = 'graph_lite'
    if attacked:
        names[-1] = 'graph_attack'
    print(names[-1])
    objects = []
    for i in range(len(names)):
        with open(path + "data/ind.{}.{}".format(dataset.lower(), names[i]),
                  'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        path + "data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    # adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    features = normalize(features)
    adj = normalize(adj + sp.eye(adj.shape[0]))

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(labels)
    labels = torch.max(labels, dim=1)[1]
    # labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)
    return adj, features, labels, idx_train, idx_val, idx_test
Esempio n. 19
0
def load_citation(dataset_str="cora", normalization="AugNormAdj", cuda=True):
    """
    Load Citation Networks Datasets.
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("../data/ind.{}.{}".format(dataset_str.lower(), names[i]),
                  'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "../data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    adj, features = preprocess_citation(adj, features, normalization)

    # porting to pytorch
    features = torch.FloatTensor(np.array(features.todense())).float()
    labels = torch.LongTensor(labels)
    labels = torch.max(labels, dim=1)[1]
    adj = sparse_mx_to_torch_sparse_tensor(adj).float()
    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    if cuda:
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()

    return adj, features, labels, idx_train, idx_val, idx_test
Esempio n. 20
0
def adj_lists_to_directed_graph(adjacency_lists):
    """Turns a dict of lists of nodes to a directed graph"""
    return nx.from_dict_of_lists(adjacency_lists, create_using=nx.DiGraph())
Esempio n. 21
0
def load_data(dataset_str,
              train_size,
              validation_size,
              model_config,
              shuffle=True,
              repeat_state=None):
    if train_size == 'public':
        return load_public_split_data(dataset_str)
    """Load data."""
    if dataset_str in ['large_cora']:
        data = sio.loadmat('data/{}.mat'.format(dataset_str))
        l = data['labels'].flatten()
        labels = np.zeros([l.shape[0], np.max(l) + 1])
        labels[np.arange(l.shape[0]), l.astype(np.int8)] = 1
        features = data['X']
        adj = data['G']
    else:
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            with open("data/ind.{}.{}".format(dataset_str, names[i]),
                      'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pkl.load(f, encoding='latin1'))
                else:
                    objects.append(pkl.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)
        adj = nx.to_scipy_sparse_matrix(nx.from_dict_of_lists(graph))
        # adj = sp.csr_matrix(adj)
        test_idx_reorder = parse_index_file(
            "data/ind.{}.test.index".format(dataset_str))
        test_idx_range = np.sort(test_idx_reorder)

        if dataset_str == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = range(min(test_idx_reorder),
                                        max(test_idx_reorder) + 1)
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), ty.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        labels = np.vstack((ally, ty))

        if dataset_str.startswith('nell'):
            # Find relation nodes, add them as zero-vecs into the right position
            test_idx_range_full = range(allx.shape[0], len(graph))
            isolated_node_idx = np.setdiff1d(test_idx_range_full,
                                             test_idx_reorder)
            tx_extended = sp.lil_matrix(
                (len(test_idx_range_full), tx.shape[1]))
            tx_extended[test_idx_range - allx.shape[0], :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), ty.shape[1]))
            ty_extended[test_idx_range - allx.shape[0], :] = ty
            ty = ty_extended

            features = sp.vstack((allx, tx)).tolil()
            features[test_idx_reorder, :] = features[test_idx_range, :]
            labels = np.vstack((ally, ty))
            labels[test_idx_reorder, :] = labels[test_idx_range, :]

            idx_all = np.setdiff1d(range(len(graph)), isolated_node_idx)

            if not os.path.isfile("data/{}.features.npz".format(dataset_str)):
                print(
                    "Creating feature vectors for relations - this might take a while..."
                )
                features_extended = sp.hstack(
                    (features,
                     sp.lil_matrix(
                         (features.shape[0], len(isolated_node_idx)))),
                    dtype=np.int32).todense()
                features_extended[isolated_node_idx,
                                  features.shape[1]:] = np.eye(
                                      len(isolated_node_idx))
                features = sp.csr_matrix(features_extended, dtype=np.float32)
                print("Done!")
                save_sparse_csr("data/{}.features".format(dataset_str),
                                features)
            else:
                features = load_sparse_csr(
                    "data/{}.features.npz".format(dataset_str))
            idx_train = np.arange(x.shape[0])
            idx_test = test_idx_reorder
            if model_config['validate']:
                assert x.shape[0] + validation_size < allx.shape[0] + tx.shape[
                    0]
                idx_val = np.arange(x.shape[0], x.shape[0] + validation_size)
            else:
                idx_val = test_idx_reorder

            train_mask = sample_mask(idx_train, labels.shape[0])
            val_mask = sample_mask(idx_val, labels.shape[0])
            test_mask = sample_mask(idx_test, labels.shape[0])

            y_train = np.zeros(labels.shape)
            y_val = np.zeros(labels.shape)
            y_test = np.zeros(labels.shape)
            y_train[train_mask, :] = labels[train_mask, :]
            y_val[val_mask, :] = labels[val_mask, :]
            y_test[test_mask, :] = labels[test_mask, :]

            return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask

        features[test_idx_reorder, :] = features[test_idx_range, :]
        labels[test_idx_reorder, :] = labels[test_idx_range, :]
        features = preprocess_features(features,
                                       feature_type=model_config['feature'])

    # split the data set
    idx_train, idx_val, idx_test = split_dataset(
        labels,
        train_size,
        model_config['test_size'],
        validation_size,
        validate=model_config['validate'],
        shuffle=shuffle)

    if model_config['verbose']:
        print('labels of each class : ', np.sum(labels[idx_train], axis=0))

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    size_of_each_class = np.sum(labels[idx_train], axis=0)

    features = features.astype(np.float32)

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
Esempio n. 22
0
def build_edge_index_nx(adjacency_list_dict):
    nx_graph = nx.from_dict_of_lists(adjacency_list_dict)
    adj = nx.adjacency_matrix(nx_graph)
    adj = adj.tocoo()  # convert to COO (COOrdinate sparse format)

    return np.row_stack((adj.row, adj.col))
Esempio n. 23
0

# ## Generate graph from edges and node data ##

# Read edges.csv and make a network out of it
edges = defaultdict(list)
with open('datasets/BlogCatalog-dataset/data/edges.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if crop is not None:
            if int(row[0]) in nodes and int(row[1]) in nodes:
                edges[int(row[0])].append(int(row[1]))
        else:
            edges[int(row[0])].append(int(row[1]))

g = nx.from_dict_of_lists(edges, create_using=nx.Graph())
if crop is not None:
    g.add_nodes_from(nodes)

# Read group-edges.csv and add that info to each node
group_edges = defaultdict(list)
with open('datasets/BlogCatalog-dataset/data/group-edges.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        if crop is not None:
            if int(row[0]) in nodes:
                group_edges[int(row[0])].append(int(row[1]))
        else:
            group_edges[int(row[0])].append(int(row[1]))

for node, data in g.nodes.items():
Esempio n. 24
0
def dict_to_adj(the_dict, directed=True):
    if directed:
        graph = nx.from_dict_of_lists(the_dict, create_using=nx.DiGraph())
    else:
        graph = nx.from_dict_of_lists(the_dict)
    return nx.adjacency_matrix(graph, nodelist=sorted(graph.nodes()))
Esempio n. 25
0
def load_citation_data(dataset_str, use_feats, data_path, split_seed=None):
    if dataset_str[:3] == 'my_':
        names1 = ['adj_matrix.npz', 'attr_matrix.npz']
        names2 = [
            'label_matrix.npy', 'train_mask.npy', 'val_mask.npy',
            'test_mask.npy'
        ]
        objects = []
        for tmp_name in names1:
            tmp_path = 'data/{}/{}.{}'.format(dataset_str, dataset_str,
                                              tmp_name)
            objects.append(sp.load_npz(tmp_path))
        for tmp_name in names2:
            tmp_path = 'data/{}/{}.{}'.format(dataset_str, dataset_str,
                                              tmp_name)
            objects.append(np.load(tmp_path))
        adj, features, label_matrix, train_mask, val_mask, test_mask = tuple(
            objects)

        labels = np.argmax(label_matrix, 1)

        arr = np.arange(len(train_mask))
        idx_train = list(arr[train_mask])
        idx_val = list(arr[val_mask])
        idx_test = list(arr[test_mask])
        return adj, features, labels, idx_train, idx_val, idx_test

    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open(
                os.path.join(data_path,
                             "ind.{}.{}".format(dataset_str, names[i])),
                'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        os.path.join(data_path, "ind.{}.test.index".format(dataset_str)))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    labels = np.argmax(labels, 1)

    idx_test = test_idx_range.tolist()
    idx_train = list(range(len(y)))
    idx_val = range(len(y), len(y) + 500)

    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    if not use_feats:
        features = sp.eye(adj.shape[0])
    return adj, features, labels, idx_train, idx_val, idx_test
Esempio n. 26
0
def get_mlp_embeddings(**kwargs):
    data = kwargs.get('data')
    vocab = kwargs.get('vocab')
    clf = MLP(n_epochs=50,
              batch_size=10000,
              init_parameters=None,
              complete_prob=False,
              add_hidden=True,
              regul_coefs=[1e-6, 1e-6],
              save_results=False,
              hidden_layer_size=2048,
              drop_out=True,
              drop_out_coefs=[0.5, 0.5],
              early_stopping_max_down=5,
              loss_name='log',
              nonlinearity='rectify')
    metainfo, X_train, Y_train, U_train, X_dev, Y_dev, U_dev, X_test, Y_test, U_test, classLatMedian, classLonMedian, userLocation, vectorizer = data
    convolution = False
    if convolution:
        logging.info('loading graph...')
        with open('/home/arahimi/git/jointgeo/data/trans.cmu.graph',
                  'rb') as fin:
            dev_graph = pickle.load(fin)
        '''
        dev_graph_indices = xrange(X_train.shape[0], X_train.shape[0] + X_dev.shape[0])
        X_test = X_test.tolil()
        for i in dev_graph_indices:
            nbrs = dev_graph[i]
            dev_index = i - X_train.shape[0]
            count = 1
            for nbr in nbrs:
                if nbr < X_train.shape[0]:
                    X_test[i - X_train.shape[0], :] += X_train[nbr, :]
                    count += 1
            X_test[i - X_train.shape[0], :] /= count
        X_test = X_test.tocsr().astype('float32')
        '''
        for i in range(0, X_train.shape[0] + X_dev.shape[0]):
            dev_graph[i].append(i)
        logging.info('creating adjacency matrix...')
        adj = nx.adjacency_matrix(nx.from_dict_of_lists(dev_graph))
        adj.setdiag(1)
        pdb.set_trace()
        logging.info('normalizing adjacency matrix...')
        normalize(adj, axis=1, norm='l1', copy=False)
        adj = adj.astype('float32')
        logging.info('vstacking...')
        X = sp.sparse.vstack([X_train, X_test])
        logging.info('convolution...')
        X_conv = adj * X
        X_conv = X_conv.tocsr().astype('float32')
        #X_train = X_conv[0:X_train.shape[0], :]
        X_test = X_conv[X_train.shape[0]:, :]

    clf.fit(X_train, Y_train, X_dev, Y_dev)
    print('Test classification accuracy is %f' % clf.accuracy(X_test, Y_test))
    y_pred = clf.predict(X_test)
    geo_eval(Y_test, y_pred, U_test, classLatMedian, classLonMedian,
             userLocation)
    print('Dev classification accuracy is %f' % clf.accuracy(X_dev, Y_dev))
    y_pred = clf.predict(X_dev)
    geo_eval(Y_dev, y_pred, U_dev, classLatMedian, classLonMedian,
             userLocation)

    X_dare = vectorizer.transform(vocab)
    X_dare = X_dare.astype('float32')
    mlp_embeddings = clf.get_embedding(X_dare)
    return vocab, mlp_embeddings
Esempio n. 27
0
def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("../dataset/ind.{}.{}".format(dataset_str, names[i]),
                  'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "../dataset/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    # preprocess feature
    features = preprocess_features(features)
    features = torch.tensor(features, dtype=torch.float32)
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    # preprocess adj
    adj = sparse_mx_to_torch_sparse_tensor(adj).to_dense()
    # adj = torch_normalize_adj(adj)
    # adj2 = preprocess_adj(adj)
    # adj2 = sparse_mx_to_torch_sparse_tensor(adj2).to_dense()
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    _, l_num = labels.shape
    labels = torch.tensor((labels * range(l_num)).sum(axis=1),
                          dtype=torch.int64)

    idx_test = test_idx_range.tolist()
    idx_train = list(range(len(y)))
    idx_val = list(range(len(y), len(y) + 500))

    return adj, features, labels, idx_train, idx_val, idx_test
Esempio n. 28
0
def load_data(dataset_name='cora', normalize_features=True):
    """
    Loads a citation dataset using the public splits as defined in
    [Kipf & Welling (2016)](https://arxiv.org/abs/1609.02907).
    :param dataset_name: name of the dataset to load ('cora', 'citeseer', or
    'pubmed');
    :param normalize_features: if True, the node features are normalized;
    :return: the citation network in numpy format, with train, test, and
    validation splits for the targets and masks.
    """
    if dataset_name not in AVAILABLE_DATASETS:
        raise ValueError('Available datasets: {}'.format(AVAILABLE_DATASETS))

    if not os.path.exists(DATA_PATH + dataset_name):
        download_data(dataset_name)

    print('Loading {} dataset'.format(dataset_name))

    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    data_path = os.path.join(DATA_PATH, dataset_name)
    for n in names:
        filename = "{}/ind.{}.{}".format(data_path, dataset_name, n)
        objects.append(load_binary(filename))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    test_idx_reorder = _parse_index_file("{}/ind.{}.test.index".format(
        data_path, dataset_name))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_name == 'citeseer':
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = _sample_mask(idx_train, labels.shape[0])
    val_mask = _sample_mask(idx_val, labels.shape[0])
    test_mask = _sample_mask(idx_test, labels.shape[0])

    # Row-normalize the features
    if normalize_features:
        print('Pre-processing node features')
        features = preprocess_features(features)

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels
Esempio n. 29
0
def load_graph_data(training_config, device):
    dataset_name = training_config['dataset_name'].lower()
    layer_type = training_config['layer_type']
    should_visualize = training_config['should_visualize']

    if dataset_name == DatasetType.CORA.name.lower():

        # shape = (N, FIN), where N is the number of nodes and FIN is the number of input features
        node_features_csr = pickle_read(
            os.path.join(CORA_PATH, 'node_features.csr'))
        # shape = (N, 1)
        node_labels_npy = pickle_read(
            os.path.join(CORA_PATH, 'node_labels.npy'))
        # shape = (N, number of neighboring nodes) <- this is a dictionary not a matrix!
        adjacency_list_dict = pickle_read(
            os.path.join(CORA_PATH, 'adjacency_list.dict'))

        # Normalize the features
        node_features_csr = normalize_features_sparse(node_features_csr)
        num_of_nodes = len(node_labels_npy)

        if layer_type == LayerType.IMP3:
            # Build edge index explicitly (faster than nx ~100 times and as fast as PyGeometric imp but less complex)
            # shape = (2, E), where E is the number of edges, and 2 for source and target nodes. Basically edge index
            # contains tuples of the format S->T, e.g. 0->3 means that node with id 0 points to a node with id 3.
            topology = build_edge_index(adjacency_list_dict,
                                        num_of_nodes,
                                        add_self_edges=True)
        elif layer_type == LayerType.IMP2 or layer_type == LayerType.IMP1:
            # adjacency matrix shape = (N, N)
            topology = nx.adjacency_matrix(
                nx.from_dict_of_lists(adjacency_list_dict)).todense().astype(
                    np.float)
            topology += np.identity(topology.shape[0])  # add self connections
            topology[topology > 0] = 1  # multiple edges not allowed
            topology[
                topology ==
                0] = -np.inf  # make it a mask instead of adjacency matrix (used to mask softmax)
            topology[topology == 1] = 0
        else:
            raise Exception(f'Layer type {layer_type} not yet supported.')

        # Note: topology is just a fancy way of naming the graph structure data
        # (be it in the edge index format or adjacency matrix)

        if should_visualize:  # network analysis and graph drawing
            plot_in_out_degree_distributions(topology, num_of_nodes,
                                             dataset_name)
            visualize_graph(topology, node_labels_npy, dataset_name)

        # Convert to dense PyTorch tensors

        # Needs to be long int type (in implementation 3) because later functions like PyTorch's index_select expect it
        topology = torch.tensor(
            topology,
            dtype=torch.long if layer_type == LayerType.IMP3 else torch.float,
            device=device)
        node_labels = torch.tensor(
            node_labels_npy, dtype=torch.long,
            device=device)  # Cross entropy expects a long int
        node_features = torch.tensor(node_features_csr.todense(),
                                     device=device)

        # Indices that help us extract nodes that belong to the train/val and test splits
        train_indices = torch.arange(CORA_TRAIN_RANGE[0],
                                     CORA_TRAIN_RANGE[1],
                                     dtype=torch.long,
                                     device=device)
        val_indices = torch.arange(CORA_VAL_RANGE[0],
                                   CORA_VAL_RANGE[1],
                                   dtype=torch.long,
                                   device=device)
        test_indices = torch.arange(CORA_TEST_RANGE[0],
                                    CORA_TEST_RANGE[1],
                                    dtype=torch.long,
                                    device=device)

        return node_features, node_labels, topology, train_indices, val_indices, test_indices
    elif dataset_name == DatasetType.PPI.name.lower():
        # Instead of checking it in, I'd rather download it on-the-fly the first time it's needed (lazy execution ^^)
        if not os.path.exists(PPI_PATH):
            os.makedirs(PPI_PATH)

            # Step 1: Download the ppi.zip (contains the PPI dataset)
            zip_tmp_path = os.path.join(PPI_PATH, 'ppi.zip')
            download_url_to_file(PPI_URL, zip_tmp_path)

            # Step 2: Unzip it
            with zipfile.ZipFile(zip_tmp_path) as zf:
                zf.extractall(path=PPI_PATH)
            print(f'Unzipping to: {PPI_PATH} finished.')

            # Step3: Remove the temporary resource file
            os.remove(zip_tmp_path)
            print(f'Removing tmp file {zip_tmp_path}.')

        # todo: load PPI
        raise Exception(f'{dataset_name} not yet supported.')
    else:
        raise Exception(f'{dataset_name} not yet supported.')
Esempio n. 30
0
def load_citation(dataset_str="cora", normalization="AugNormAdj", cuda=True):
    """
    Load Citation Networks Datasets.
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str.lower(), names[i]),
                  'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    if dataset_str == 'citeseer':
        idx_test = torch.LongTensor(test_idx_range.tolist())  #[1708, 2707]
        idx_train = torch.LongTensor(range(len(y)))  #[0,140)
        idx_val = torch.LongTensor(range(len(y), len(y) + 500))  #[140,640)
    else:
        ### setting for cora
        ### take from https://github.com/tkipf/pygcn/blob/master/pygcn/utils.py
        idx_train = range(140)
        idx_val = range(200, 500)
        idx_test = range(500, 1500)
        idx_train = torch.LongTensor(idx_train)
        idx_val = torch.LongTensor(idx_val)
        idx_test = torch.LongTensor(idx_test)
    labels = torch.LongTensor(labels)
    labels = torch.max(labels, dim=1)[1]
    features = normalize(features)
    A_tilde = normalize_adjacency_matrix(adj, sp.eye(adj.shape[0]))
    adj_p = normalizemx(adj)
    features = torch.FloatTensor(np.array(features.todense()))
    print('Loading')
    adj_sct1 = scattering1st(adj_p, 1)
    print('SCT 1 done')
    print('Loading')
    adj_sct2 = scattering1st(adj_p, 2)
    print('SCT 2 done')
    adj_sct4 = scattering1st(adj_p, 4)
    print('SCT 4 done')
    adj_p = sparse_mx_to_torch_sparse_tensor(adj_p)
    A_tilde = sparse_mx_to_torch_sparse_tensor(A_tilde)
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    return adj, adj_p, A_tilde, adj_sct1, adj_sct2, adj_sct4, features, labels, idx_train, idx_val, idx_test
Esempio n. 31
0
def load_citation_data(cfg):
    """
    (DCMMC) Github repo of - planetoid (Zhilin Yang, William W. - Cohen, Ruslan Salakhutdinov, Revisiting Semi-Supervised Learning with Graph Embeddings, ICML 2016) provided a preprocessed Cora dataset and a fixed splitting

    Copied from gcn
    citeseer/cora/pubmed with gcn split
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open(
                "{}/ind.{}.{}".format(cfg['citation_root'],
                                      cfg['activate_dataset'], names[i]),
                'rb') as f:
            objects.append(pkl.load(f, encoding='latin1'))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("{}/ind.{}.test.index".format(
        cfg['citation_root'], cfg['activate_dataset']))
    test_idx_range = np.sort(test_idx_reorder)

    if cfg['activate_dataset'] == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = preprocess_features(features)
    features = features.todense()

    G = nx.from_dict_of_lists(graph)
    edge_list = G.adjacency_list()

    degree = [0] * len(edge_list)
    if cfg['add_self_loop']:
        for i in range(len(edge_list)):
            edge_list[i].append(i)
            degree[i] = len(edge_list[i])
    max_deg = max(degree)
    mean_deg = sum(degree) / len(degree)
    print(f'max degree: {max_deg}, mean degree:{mean_deg}')

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]  # one-hot labels
    n_sample = labels.shape[0]
    n_category = labels.shape[1]
    lbls = np.zeros((n_sample, ))
    if cfg['activate_dataset'] == 'citeseer':
        n_category += 1  # one-hot labels all zero: new category
        for i in range(n_sample):
            try:
                lbls[i] = np.where(labels[i] == 1)[0]  # numerical labels
            except ValueError:  # labels[i] all zeros
                lbls[i] = n_category + 1  # new category
    else:
        for i in range(n_sample):
            lbls[i] = np.where(labels[i] == 1)[0]  # numerical labels

    idx_test = test_idx_range.tolist()
    idx_train = list(range(len(y)))
    idx_val = list(range(len(y), len(y) + 500))
    return features, lbls, idx_train, idx_val, idx_test, n_category, edge_list, edge_list
Esempio n. 32
0
def load_data_vis_multi(dataset_str, use_trainval, feat_suffix, label_suffix='ally_multi'):
    """Load data."""
    names = [feat_suffix, label_suffix, 'graph']
    objects = []
    for i in range(len(names)):
        with open("{}/ind.NELL.{}".format(dataset_str, names[i]), 'rb') as f:
            print("{}/ind.NELL.{}".format(dataset_str, names[i]))
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    allx, ally, graph = tuple(objects)
    train_test_mask = []
    with open("{}/ind.NELL.index".format(dataset_str), 'rb') as f:
        train_test_mask = pkl.load(f)

    features = allx  # .tolil()
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    labels = np.array(ally)

    idx_test = []
    idx_train = []
    idx_trainval = []

    if use_trainval == True:
        for i in range(len(train_test_mask)):

            if train_test_mask[i] == 0:
                idx_train.append(i)
            if train_test_mask[i] == 1:
                idx_test.append(i)

            if train_test_mask[i] >= 0:
                idx_trainval.append(i)
    else:
        for i in range(len(train_test_mask)):

            if train_test_mask[i] >= 0:
                idx_train.append(i)
            if train_test_mask[i] == 1:
                idx_test.append(i)

            if train_test_mask[i] >= 0:
                idx_trainval.append(i)

    idx_val = idx_test

    train_mask = sample_mask_sigmoid(idx_train, labels.shape[0], labels.shape[1])
    val_mask = sample_mask_sigmoid(idx_val, labels.shape[0], labels.shape[1])
    trainval_mask = sample_mask_sigmoid(idx_trainval, labels.shape[0], labels.shape[1])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_trainval = np.zeros(labels.shape)

    y_train[train_mask] = labels[train_mask]
    y_val[val_mask] = labels[val_mask]
    y_trainval[trainval_mask] = labels[trainval_mask]

    return adj, features, y_train, y_val, y_trainval, train_mask, val_mask, trainval_mask
Esempio n. 33
0
def prepare_data(dataset_dir,dataset_name):
    # data load
    # #py3中想读取py2中保存的数据,需要指定编码
    # x=pickle.load(open('Data/citeseer/ind.citeseer.x','rb'),encoding='iso-8859-1')#(120, 3703)
    # y=pickle.load(open('Data/citeseer/ind.citeseer.y','rb'),encoding='iso-8859-1')#(120, 6),onehot

    print("Loading raw data from files...")

    #dataset_dir = './Data'
    #dataset_name = 'citeseer'
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open(dataset_dir+"/ind.{}.{}".format(dataset_name, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pickle.load(f, encoding='latin1'))
            else:
                objects.append(pickle.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)

    print("Done.")

    print("Processing Data...")

    index = []
    for line in open(dataset_dir+"/ind.{0}.test.index".format(dataset_name)):
        index.append(int(line.strip()))

    test_idx_reorder = index
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_name == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)  # [2312,3326]
        tx_extended = sparse.lil_matrix((len(test_idx_range_full), x.shape[1]))  # shape=(1015,3703)
        tx_extended[test_idx_range - min(test_idx_range), :] = tx  # 没有数据的序号当成属性全0。
        # 注意上一行用的是test_idx_range,所以现在tx内部还不是按照idx升序排列的。
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))  # shape=(1015,6)
        ty_extended[test_idx_range - min(test_idx_range), :] = ty  # 没有数据的序号当成标签全0。
        ty = ty_extended

        # 注意这步结束之后,tx与ty中,与idx相吻合的行号,都是对应的值。

    features = sparse.vstack([allx, tx]).toarray() #(3327,3703)
    features[test_idx_reorder, :] = features[test_idx_range, :]  # 这一步保证了feature内,test结点每一行的行号都与其值对应的index吻合。

    ### 对feature归一化。
    features = row_normalize_safe(features)

    labels = np.vstack((ally, ty)) #(N,n_classes) , (3327,6)
    labels[test_idx_reorder, :] = labels[test_idx_range, :]  # 同理,行号与index对齐。

    idx_test = test_idx_range.tolist() #从这步可以看到空白孤立点并不参与验证集的检验,index不在名单中。
    idx_train = range(len(y))  # range(120)
    idx_val = range(len(y), len(y) + 500)  # range(120,620)

    # 在citeseer数据集里有[0,119]个点在x中,[0,2311]共2312个点在allx中,有[2312,2326]共1015个点在tx中。
    # 不知道为什么训练集120,验证集只划分了500个。

    ###图结构
    # 这里偷懒用了nx包的函数,坏处是多引进了一个包却只为了一个函数。
    # 可以考虑自己写一个从dict创建adjacent matrix的函数。
    adj_matrix = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    hatA = torch.FloatTensor(get_hatA(adj_matrix))#(3327,3327)

    print('Data processed to numpy.')
    # 目前为止都是numpyd ,处理为适配torch模型的形态

    n_classes = labels.shape[1] #(N,cls)
    labels = torch.LongTensor(labels).argmax(dim=1) #(N,)
    features = torch.FloatTensor(features) #(N,D)

    print('Data adjusted to torch.')

    return hatA,features,labels, n_classes,idx_train,idx_val,idx_test
Esempio n. 34
0
def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)
    print(x.shape, y.shape, tx.shape, ty.shape, allx.shape, ally.shape)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    # print(len(labels))

    idx_test = test_idx_range.tolist()
    # print(idx_test)
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
Esempio n. 35
0
def train(args):
    env = make_env(args.domain, args.instance)
    num_action_vars = env.num_action_vars

    # neural net parameters
    num_valid_actions = num_action_vars + 2
    state_dim = env.num_state_vars

    # nn hidden layer parameters
    num_gcn_features = args.num_features
    num_hidden_transition = int((2 * state_dim + num_action_vars) / 2)

    global_step = tf.Variable(0, name="global_step", trainable=False)

    instance_parser = InstanceParser(args.domain, args.instance)
    fluent_feature_dims = instance_parser.fluent_feature_dims
    nonfluent_feature_dims = instance_parser.nonfluent_feature_dims

    # Build network
    model = TransitionModel(num_inputs=state_dim,
                            num_outputs=num_valid_actions,
                            num_features=num_gcn_features,
                            num_hidden_transition=num_hidden_transition,
                            fluent_feature_dims=fluent_feature_dims,
                            nonfluent_feature_dims=nonfluent_feature_dims,
                            to_train="decoder",
                            activation=args.activation,
                            learning_rate=args.lr)

    # Loader
    current_sa_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope='current_state_encoder')
    next_sa_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='next_state_encoder')
    transition_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope='transition')

    loader = tf.train.Saver({
        'global/policy_net/gconv1_vars/weights_0':
        current_sa_vars[0],
        'global/policy_net/gconv1_vars/weights_0':
        next_sa_vars[0],
        'global/policy_net/transition_hidden1/weights':
        transition_vars[0],
        'global/policy_net/transition_hidden1/biases':
        transition_vars[1],
        'global/policy_net/transition_hidden2/weights':
        transition_vars[2],
        'global/policy_net/transition_hidden2/biases':
        transition_vars[3],
    })

    restore_dir = args.restore_dir

    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = 0.9

    adjacency_list = instance_parser.get_adjacency_list()
    adjacency_list = nx.adjacency_matrix(nx.from_dict_of_lists(adjacency_list))

    MODEL_DIR = os.path.join(
        args.model_dir, '{}-{}-{}'.format(args.domain, args.instance,
                                          args.num_features))

    summary_writer = tf.summary.FileWriter(os.path.join(MODEL_DIR, "train"))
    summaries_freq = 100

    CHECKPOINT_DIR = os.path.join(MODEL_DIR, "checkpoints")
    if not os.path.exists(CHECKPOINT_DIR):
        os.makedirs(CHECKPOINT_DIR)
    checkpoint_path = os.path.join(CHECKPOINT_DIR, 'model')

    saver = tf.train.Saver(max_to_keep=10)
    checkpoint_freq = 5000

    with tf.Session(config=config) as sess:
        load_model(sess, loader, restore_dir)

        # Training
        for counter in xrange(args.num_train_iter):
            # Generate state tuples
            state_tuples = generate_data_from_env(env, args.domain)

            # Compute transition probabilities
            states = []
            next_states = []
            action_probs = []
            for st in state_tuples:
                state = np.array(st[0])
                next_state = np.array(st[1])
                action_prob = instance_parser.get_action_probs(
                    state, next_state)

                states.append(state)
                next_states.append(next_state)
                action_probs.append(np.array(action_prob))

            batch_size = len(states)
            # adj_preprocessed = get_processed_adj(adjacency_list, batch_size)
            # current_input_features_preprocessed = get_processed_input(
            #     states, env.num_state_vars)
            # next_input_features_preprocessed = get_processed_input(
            #     next_states, env.num_state_vars)

            adj_preprocessed = get_processed_adj(adjacency_list, batch_size)
            current_input_features_preprocessed = get_processed_input(
                states, instance_parser)
            next_input_features_preprocessed = get_processed_input(
                next_states, instance_parser)

            # Backprop
            feed_dict = {
                model.current_state:
                states,
                model.current_inputs:
                current_input_features_preprocessed,
                model.next_inputs:
                next_input_features_preprocessed,
                model.placeholders_hidden1['support'][0]:
                adj_preprocessed,
                model.placeholders_hidden1['dropout']:
                0.0,
                model.placeholders_hidden1['num_features_nonzero']:
                current_input_features_preprocessed[1].shape,
                model.placeholders_hidden2['support'][0]:
                adj_preprocessed,
                model.placeholders_hidden2['dropout']:
                0.0,
                model.placeholders_hidden2['num_features_nonzero']:
                next_input_features_preprocessed[1].shape,
                model.action_probs:
                action_probs
            }
            step, loss, _, summaries = sess.run(
                [global_step, model.loss, model.train_op, model.summaries],
                feed_dict)

            # Write summaries
            if counter % summaries_freq == 0:
                summary_writer.add_summary(summaries, step)
                summary_writer.flush()

            # Store checkpoints
            if counter % checkpoint_freq == 0:
                saver.save(sess, checkpoint_path, step)
Esempio n. 36
0
def maze_graph(nodelist):
    flower_graph = {
        1: [2, 7],
        2: [1, 3],
        3: [2, 4, 9],
        4: [3, 5],
        5: [4, 11],
        6: [7, 13],
        7: [6, 1, 8],
        8: [7, 9, 15],
        9: [3, 8, 10],
        10: [9, 11, 17],
        11: [5, 10, 12],
        12: [11, 19],
        13: [6, 14],
        14: [13, 15, 20],
        15: [8, 14, 16],
        16: [15, 17, 22],
        17: [10, 16, 18],
        18: [17, 19, 24],
        19: [12, 18],
        20: [14, 21],
        21: [20, 22],
        22: [16, 21, 23],
        23: [22, 24],
        24: [18, 23]
    }

    island_prefixes = ['1', '2', '3', '4']

    bridge_edges = [('124', '201', 60), ('302', '121', 172),
                    ('223', '404', 169), ('324', '401', 60),
                    ('305', '220', 60)]

    bridge_edges_uw = [('124', '201'), ('121', '302'), ('223', '404'),
                       ('324', '401'), ('305', '220')]

    graph_prototype = {}

    for letter in island_prefixes:
        for node_suffix, edges in flower_graph.items():
            if node_suffix < 10:
                first_point = letter + '{}{}'.format(0, str(node_suffix))
            else:
                first_point = letter + '{}'.format(node_suffix)
            edge_list = []
            for n in edges:
                if n < 10:
                    second_point = letter + '{}{}'.format(0, str(n))
                else:
                    second_point = letter + '{}'.format(n)
                edge_list.append(second_point)
            graph_prototype[first_point] = edge_list

    mg = nx.Graph()
    xg = nx.Graph()
    mg = nx.from_dict_of_lists(graph_prototype)
    xg = nx.from_dict_of_lists(graph_prototype)
    for e in mg.edges():
        mg[e[0]][e[1]]['weight'] = 30

    mg.add_weighted_edges_from(bridge_edges)
    xg.add_edges_from(bridge_edges_uw)
    simple_path = dict(nx.all_pairs_shortest_path(xg))
    dijkstra_path = dict(nx.all_pairs_dijkstra_path(mg, weight='weight'))

    return mg, simple_path, dijkstra_path
Esempio n. 37
0
def load_nell(dataset="nell.0.001",
              normalization="AugNormAdj",
              porting_to_torch=True,
              data_path=datadir,
              task_type="full"):

    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/nell_data/ind.{}.{}".format(dataset, names[i]),
                  'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/nell_data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'nell.0.001':
        # Find relation nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(allx.shape[0], len(graph))
        isolated_node_idx = np.setdiff1d(test_idx_range_full, test_idx_reorder)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - allx.shape[0], :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - allx.shape[0], :] = ty
        ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]

        idx_all = np.setdiff1d(range(len(graph)), isolated_node_idx)
        if not os.path.isfile("data/{}.features.npz".format(dataset)):
            print(
                "Creating feature vectors for relations - this might take a while..."
            )
            features_extended = sp.hstack(
                (features,
                 sp.lil_matrix((features.shape[0], len(isolated_node_idx)))),
                dtype=np.int32).todense()

            features_extended[isolated_node_idx, features.shape[1]:] = np.eye(
                len(isolated_node_idx))
            features = sp.csr_matrix(features_extended)
            print("Done!")
            save_sparse_csr("data/{}.features".format(dataset), features)
        else:
            features = load_sparse_csr("data/{}.features.npz".format(dataset))

        adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    # degree = np.asarray(G.degree)
    degree = np.sum(adj, axis=1)

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    if task_type == "full":
        print("Load full supervised task.")
        #supervised setting
        idx_test = test_idx_range.tolist()
        idx_train = range(len(ally) - 500)
        idx_val = range(len(ally) - 500, len(ally))
    elif task_type == "semi":
        print("Load semi-supervised task.")
        #semi-supervised setting
        idx_test = test_idx_range.tolist()
        idx_train = range(len(y))
        idx_val = range(len(y), len(y) + 500)
    else:
        raise ValueError(
            "Task type: %s is not supported. Available option: full and semi.")

    features = features.astype(float)
    adj, features = preprocess_citation(adj, features, normalization)
    # features = np.array(features.todense())
    labels = np.argmax(labels, axis=1)
    # porting to pytorch
    if porting_to_torch:
        features = torch.FloatTensor(features).float()
        labels = torch.LongTensor(labels)
        # labels = torch.max(labels, dim=1)[1]
        adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        idx_train = torch.LongTensor(idx_train)
        idx_val = torch.LongTensor(idx_val)
        idx_test = torch.LongTensor(idx_test)
        degree = torch.LongTensor(degree)
    learning_type = "transductive"
    return adj, features, labels, idx_train, idx_val, idx_test, degree, learning_type
Esempio n. 38
0
    # get D1 Friends, the Rest will be iterable
    # Get the top five reciprocal friends and put them into the list of tuples
    list_of_tuples = []
    list_of_ids = []


    final_graph = {}
    client = pymongo.MongoClient()
    db = client.final
    for val in db.collection_names():
        final_graph[int(val)] = load_from_mongo('final', str(val))[0]['reciprocal_friends']
    final_graph[24551258] = intersection


    # Start creating the graph peice by peice.
    G=nx.from_dict_of_lists(final_graph)
    pos=nx.spring_layout(G) # positions for all nodes
    nx.draw_networkx_nodes(G,pos,node_size=3)
    nx.draw_networkx_edges(G,pos,width=1)
    nx.draw_networkx_labels(G,pos,font_size=2,font_family='sans-serif')
    plt.show()

    
    print("the number of nodes of the network is " + str(G.size()))
    print("The diameter of the network is: " + str(nx.diameter(G)))
    print("The average distance of the network is " + str(nx.center(G)))




Esempio n. 39
0
from pprint import pprint
import networkx as nx
import matplotlib.pyplot as plt

a, b, c, d, e, f, g, h = range(8)

N = {
     a: [b, c, d, e, f],
     b: [c, e],
     c: [d],
     d: [e],
     e: [f],
     f: [c, g, h],
     g: [f, h],
     h: [f, g]
    }

G = nx.from_dict_of_lists(N)
nx.draw(G)
plt.show()
print(type(N))
pprint(N)

Esempio n. 40
0
def load_data(dataset_str, is_sparse):
    if dataset_str == "ppi":
        return load_graphsage_data('data/ppi/ppi', is_sparse)
    """Load data."""
    if dataset_str != 'nell':
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            with open("data/ind.{}.{}".format(dataset_str, names[i]),
                      'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pkl.load(f, encoding='latin1'))
                else:
                    objects.append(pkl.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        test_idx_reorder = parse_index_file(
            "data/ind.{}.test.index".format(dataset_str))
        test_idx_range = np.sort(test_idx_reorder)

        if dataset_str == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = range(min(test_idx_reorder),
                                        max(test_idx_reorder) + 1)
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        features = preprocess_features(features, is_sparse)
        adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
        support = preprocess_adj(adj)

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        idx_test = test_idx_range.tolist()
        idx_train = range(len(y))
        idx_val = range(len(y), len(y) + 500)

        train_mask = sample_mask(idx_train, labels.shape[0])
        val_mask = sample_mask(idx_val, labels.shape[0])
        test_mask = sample_mask(idx_test, labels.shape[0])

        # y_train = np.zeros(labels.shape)
        # y_val = np.zeros(labels.shape)
        # y_test = np.zeros(labels.shape)
        # y_train = labels[train_mask, :]
        # y_val[val_mask, :] = labels[val_mask, :]
        # y_test[test_mask, :] = labels[test_mask, :]
    else:
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            with open("data/savedData/{}.{}".format(dataset_str, names[i]),
                      'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pkl.load(f, encoding='latin1'))
                else:
                    objects.append(pkl.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        test_idx_reorder = parse_index_file(
            "data/savedData/{}.test.index".format(dataset_str))
        features = allx.tolil()
        adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
        labels = ally
        features = preprocess_features(features, is_sparse)
        support = preprocess_adj(adj)
        idx_test = test_idx_reorder
        idx_train = range(len(y))
        idx_val = range(len(y), len(y) + 969)
        train_mask = sample_mask(idx_train, labels.shape[0])
        val_mask = sample_mask(idx_val, labels.shape[0])
        test_mask = sample_mask(idx_test, labels.shape[0])

    # if not os.path.isfile("data/{}.nbrs.npz".format(dataset_str)):
    #     N = adj.shape[0]
    #     pool = multiprocessing.Pool(processes=56)
    #
    #     lis = []
    #     for i in range(32):
    #         li = range(int(N/32)*i, max(int(N/32)*(i+1), N))
    #         lis.append(li)
    #     adjs = [adj] * 32
    #     results = pool.map(starfind_4o_nbrs, zip(adjs, lis))
    #
    #     pool.close()
    #     pool.join()
    #     nbrs = results[0]
    #     # cnt = 0
    #     # for i in range(32):
    #     #
    #     #     cnt += len(results[i])
    #     #     print(cnt)
    #     #     nbrs += results[i]
    #
    #     np.savez("data/{}.nbrs.npz".format(dataset_str), data = nbrs)
    # else:
    #     loader = np.load("data/{}.nbrs.npz".format(dataset_str))
    #     nbrs = loader['data']
    print(adj.shape)
    return None, support, support, features, labels, train_mask, val_mask, test_mask
Esempio n. 41
0
def load_data(dataset_str):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    print("graph is....")
    print(type(graph))
    print("allx is....")
    print(type(allx))  #
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    if dataset_str == 'nell.0.001':
        # Find relation nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(allx.shape[0], len(graph))
        isolated_node_idx = np.setdiff1d(test_idx_range_full, test_idx_reorder)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - allx.shape[0], :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - allx.shape[0], :] = ty
        ty = ty_extended

        features = sp.vstack((allx, tx)).tolil().toarray()
        print(features)
        features[test_idx_reorder, :] = features[test_idx_range, :]

        idx_all = np.setdiff1d(range(len(graph)), isolated_node_idx)

        if not os.path.isfile(
                "data/planetoid/{}.features.npz".format(dataset_str)):
            print(
                "Creating feature vectors for relations - this might take a while..."
            )
            features_extended = sp.hstack(
                (features,
                 sp.lil_matrix((features.shape[0], len(isolated_node_idx)))),
                dtype=np.int32).todense()
            features_extended[isolated_node_idx, features.shape[1]:] = np.eye(
                len(isolated_node_idx))
            features = sp.csr_matrix(features_extended)
            print("Done!")
            save_sparse_csr("data/planetoid/{}.features".format(dataset_str),
                            features)
        else:
            features = load_sparse_csr(
                "data/planetoid/{}.features.npz".format(dataset_str))

        adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]
    np.savetxt('labels', labels)

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
for i, city in enumerate(msa):
    print "Compute the number of neighbourhoods for %s (%s/%s)"%(msa[city],
                                                i+1,
                                                len(msa))

    ## Import adjacency matrix
    adjacency = {}
    with open('extr/adjacency_bg/msa/%s.csv'%city, 'r') as source:
        reader = csv.reader(source, delimiter='\t')
        reader.next()
        for rows in reader:
            adjacency[rows[0]] = rows[1:]


    ## Transform into graph
    G = nx.from_dict_of_lists(adjacency)


    ## Import list of bg where each class is overrepresented
    over_bg = {cl:[] for cl in classes}
    with open('extr/neighbourhoods/classes/msa/%s.csv'%city, 'r') as source:
        reader = csv.reader(source, delimiter='\t')
        for rows in reader:
            over_bg[rows[0]].append(rows[1])


    ## Extract neighbourhoods (the connected components of the subgraph
    ## constituted of the areal units where the class is overrepresented)
    neighbourhoods = {cl: nx.connected_component_subgraphs(G.subgraph(over_bg[cl]))
                        for cl in classes}
    neigh_num[city] = {cl: len(list(neighbourhoods[cl])) for cl in classes}
Esempio n. 43
0
def graph_to_json():

    json_file = {}

    position_file = 'https://s3-us-west-2.amazonaws.com/pollstr/visuals/dataBrexit.txt'
    open_s3 = urllib.URLopener()
    position = eval(open_s3.open(position_file).read())

    neighborhood_file = 'https://s3-us-west-2.amazonaws.com/pollstr/visuals/net4.txt'
    open_s3 = urllib.URLopener()
    neighborhood_dict = eval(open_s3.open(neighborhood_file).read())

    Graph = nx.from_dict_of_lists(neighborhood_dict)

    nodes = Graph.nodes()
    list_of_nodes = []

    id_of_nodes = {}
    i = 0
    for node in nodes:
        id_of_nodes[node] = i
        i += 1

    node_info_dict = {}

    for node in nodes:
        node_info = {}
        node_info['name'] = str(node)
        try:
            if position[node]['position'] == 'leave':
                node_info['color'] = 'blue'
                node_info['followers'] = position[node]['followers']
                node_info['logFollowers'] = position[node]['log']

            elif position[node]['position'] == 'remain':
                node_info['color'] = 'yellow'
                node_info['followers'] = position[node]['followers']
                node_info['logFollowers'] = position[node]['log']
            else:
                node_info['color'] = '#e7e7e7'
                node_info['followers'] = position[node]['followers']
                node_info['logFollowers'] = position[node]['log']
        except:
            node_info['color'] = '#e7e7e7'
            node_info['followers'] = 'DK'
            node_info['logFollowers'] = 3
        node_info_dict[str(node)] = node_info

        list_of_nodes.append(node_info)

    edges = Graph.edges()
    list_of_edges = []
    for node in nodes:
        neighbors = Graph.neighbors(node)
        for neighbor in neighbors:

            edge_info = {}
            edge_info['source'] = id_of_nodes[node]
            edge_info['target'] = id_of_nodes[neighbor]
            edge_info['value'] = 1
            try:
                edge_info['color'] = node_info_dict[node]['color']
            except:
                edge_info['color'] = '#e7e7e7'
            list_of_edges.append(edge_info)

    json_file['nodes'] = list_of_nodes
    json_file['links'] = list_of_edges

    json_file = json.dumps(json_file)

    return json_file
Esempio n. 44
0
import json
import heapq as heap
from operator import itemgetter
import numpy as np
import sim
import betweenness_centrality


# Load data from file given by command line argument
filename = sys.argv[1]
N = int(filename.split('.')[-3])
f = open(filename)
graph_data = json.load(f)
f.close()

G = nx.from_dict_of_lists(graph_data)

def save_graph(graph, save_name):
    '''
    Saves networkx graph "graph" as pdf named "save_name"
    Source: http://stackoverflow.com/a/17388676
    '''

    #initialze Figure
    plt.figure(num=None, figsize=(20, 20), dpi=80)
    plt.axis('off')
    fig = plt.figure(1)
    pos = nx.spring_layout(graph)
    nx.draw_networkx_nodes(graph,pos)
    nx.draw_networkx_edges(graph,pos)
    nx.draw_networkx_labels(graph,pos)
Esempio n. 45
0
def construct_tree_from_graph(adjacency_list, density, prune_threshold=None,
                              num_levels=None, verbose=False):
    """
    Construct a level set tree from a similarity graph and a density estimate.

    Parameters
    ----------
    adjacency_list : list [list]
        Adjacency list of the k-nearest neighbors graph on the data. Each entry
        contains the indices of the `k` closest neighbors to the data point at
        the same row index.

    density : list [float]
        Estimate of the density function, evaluated at the data points
        represented by the keys in `adjacency_list`.

    prune_threshold : int, optional
        Leaf nodes with fewer than this number of members are recursively
        merged into larger nodes. If 'None' (the default), then no pruning
        is performed.

    num_levels : list int, optional
        Number of density levels in the constructed tree. If None (default),
        `num_levels` is internally set to be the number of rows in `X`.

    verbose : bool, optional
        If True, a progress indicator is printed at every 100th level of tree
        construction.

    Returns
    -------
    T : levelSetTree
        See the LevelSetTree class for attributes and method definitions.

    See Also
    --------
    construct_tree, LevelSetTree

    Examples
    --------
    >>> X = numpy.random.rand(100, 2)
    >>> knn_graph, radii = debacl.utils.knn_graph(X, k=8)
    >>> density = debacl.utils.knn_density(radii, n=100, p=2, k=8)
    >>> tree = debacl.construct_tree_from_graph(knn_graph, density,
    ...                                         prune_threshold=5)
    >>> print tree
    +----+-------------+-----------+------------+----------+------+--------+----------+
    | id | start_level | end_level | start_mass | end_mass | size | parent | children |
    +----+-------------+-----------+------------+----------+------+--------+----------+
    | 0  |    0.000    |   0.768   |   0.000    |  0.390   | 100  |  None  |  [1, 2]  |
    | 1  |    0.768    |   1.494   |   0.390    |  0.790   |  30  |   0    |  [7, 8]  |
    | 2  |    0.768    |   4.812   |   0.390    |  1.000   |  31  |   0    |    []    |
    | 7  |    1.494    |   2.375   |   0.790    |  0.950   |  6   |   1    |    []    |
    | 8  |    1.494    |   2.308   |   0.790    |  0.940   |  5   |   1    |    []    |
    +----+-------------+-----------+------------+----------+------+--------+----------+
    """

    ## Initialize the graph and cluster tree
    levels = _utl.define_density_mass_grid(density, num_levels=num_levels)

    G = _nx.from_dict_of_lists(
        {i: neighbors for i, neighbors in enumerate(adjacency_list)})

    T = LevelSetTree(density, levels)

    ## Figure out roots of the tree
    cc0 = _nx.connected_components(G)

    for i, c in enumerate(cc0):  # c is only the vertex list, not the subgraph
        T._subgraphs[i] = G.subgraph(c)
        T.nodes[i] = ConnectedComponent(
            i, parent=None, children=[], start_level=0., end_level=None,
            start_mass=0., end_mass=None, members=c)

    # Loop through the removal grid
    previous_level = 0.
    n = float(len(adjacency_list))

    for i, level in enumerate(levels):
        if verbose and i % 100 == 0:
            _logging.info("iteration {}".format(i))

        ## figure out which points to remove, i.e. the background set.
        bg = _np.where((density > previous_level) & (density <= level))[0]
        previous_level = level

        ## compute the mass after the current bg set is removed
        old_vcount = sum([x.number_of_nodes()
                          for x in T._subgraphs.itervalues()])
        current_mass = 1. - ((old_vcount - len(bg)) / n)

        # loop through active components, i.e. subgraphs
        deactivate_keys = []     # subgraphs to deactivate at the iter end
        activate_subgraphs = {}  # new subgraphs to add at the end of the iter

        for (k, H) in T._subgraphs.iteritems():

            ## remove nodes at the current level
            H.remove_nodes_from(bg)

            ## check if subgraph has vanished
            if H.number_of_nodes() == 0:
                T.nodes[k].end_level = level
                T.nodes[k].end_mass = current_mass
                deactivate_keys.append(k)

            else:  # subgraph hasn't vanished

                ## check if subgraph now has multiple connected components
                # NOTE: this is *the* bottleneck
                if not _nx.is_connected(H):

                    ## deactivate the parent subgraph
                    T.nodes[k].end_level = level
                    T.nodes[k].end_mass = current_mass
                    deactivate_keys.append(k)

                    ## start a new subgraph & node for each child component
                    cc = _nx.connected_components(H)

                    for c in cc:
                        new_key = max(T.nodes.keys()) + 1
                        T.nodes[k].children.append(new_key)
                        activate_subgraphs[new_key] = H.subgraph(c)

                        T.nodes[new_key] = ConnectedComponent(
                            new_key, parent=k, children=[], start_level=level,
                            end_level=None, start_mass=current_mass,
                            end_mass=None, members=c)

        # update active components
        for k in deactivate_keys:
            del T._subgraphs[k]

        T._subgraphs.update(activate_subgraphs)

    ## Prune the tree
    if prune_threshold is not None:
        T = T.prune(threshold=prune_threshold)

    return T
Esempio n. 46
0
import simplejson as json
import networkx as nx
from networkx.readwrite import json_graph
import matplotlib.pyplot as plt
from numpy import cumsum

print 'Running Graph Properties script'

with open('net_sci_coauthorships.txt', 'r') as f:
    js_graph = json.load(f) # Dictionary of key-value pairs
G = nx.from_dict_of_lists(js_graph)

#### Plot histogram #### 
# Get degrees of all nodes, create sorted list
degrees = nx.degree(G).values()
plt.hist(degrees, bins=10, log=True)
plt.title('Degree Histogram')
plt.ylabel('Number of Nodes')
plt.xlabel('Degree')
# plt.show()
plt.savefig('degree_histogram.png')
plt.clf()

#### Plot cumulative distribution function #### 
cumsums = cumsum(degrees)
plt.plot(cumsums)
plt.title('Cumulative Node Degrees')
plt.ylabel('Cumulative Node Degree')
plt.xlabel('Number of Nodes')
# plt.show()
plt.savefig('degree_cumsum.png')
Esempio n. 47
0
letters = {'a':['c','e','g'],
        'b':['a','d','c','e','f','g',],
        'c':['a','d','g','h'],
        'd':['d','c','f','g'],
        'e':['d','g'],
        'f':['j'],
        'g':['h'],
        'h':['a','e','g'],
        'i':[],
        'j':['a','g'],
        'k':['e','f','g']
        }

# the digraph datastructure from LangDict object
g = nx.DiGraph()
nx.from_dict_of_lists(LangDict(letters), create_using=g)
g = g.reverse()

#before graph
nx.draw_networkx(g, arrows=True)
plt.draw()
plt.show()

scc = nx.strongly_connected_components(g)
print('yup')

G = nx.condensation(g, scc=scc)
print('done')


# Display the info
Esempio n. 48
0
        sys.exit(1)

    path = sys.argv[1]

    # Define global var
    num_rounds = 50

    # Parse the input path to find filename and number of players and seeds
    (directory, filename, num_players, num_seeds) = parse_file_path(path)

    # Find output file name
    output_filename = directory + filename.rsplit('.', 1)[0] + ".txt"

    # Get the adjacency list
    graph = load_graph(path)

    # Generate graph from nodes
    G = nx.from_dict_of_lists(graph)

    # Generate a list of random nodes as root nodes
    # strategy = random_nodes_strategy(graph, num_seeds, num_rounds)
    # strategy = highest_degree_strategy(graph, num_seeds, num_rounds)
    # strategy = eigenvector_strategy(G, num_seeds, num_rounds)
    # strategy = dominating_set_strategy(G, num_seeds, num_rounds)
    # strategy = load_centraility(G, num_seeds, num_rounds)
    # strategy = mixed_strategy(G, num_seeds, num_rounds)
    strategy = closeness_strategy(G, num_seeds, num_rounds)

    # Save input file
    save_output(output_filename, strategy)
def matching(items):
    """I'm not sure what this does but I bet it's useful"""
    G = nx.from_dict_of_lists(items)
    return tuple((k, v) for k, v in nx.bipartite.maximum_matching(G, top_nodes=items).items() if k in items)
Esempio n. 50
0
# In[123]:

word_list = ['halloween', 'love', 'follow', 'happi', 'night', 'bihday', 'dress']
word_ass_dict = {}
for x in word_list:
    word_ass_dict[x] = count_ass(x)


# In[124]:

word_ass_dict


# In[125]:

Gword=nx.from_dict_of_lists(word_ass_dict)


# In[156]:

pos = nx.shell_layout(Gword)
nx.draw_networkx(Gword, pos, node_size = 1500, node_color = 'w', font_color = 'b', font_size = '12')
plt.axis('off')
plt.title('Graph of associations between most frequent words', fontsize='20')
plt.show()


# In[ ]:


def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => 训练实例(节点)特征向量,scipy.sparse.csr.csr_matrix
    ind.dataset_str.tx => 测试实例特征向量,scipy.sparse.csr.csr_matrix
    ind.dataset_str.allx => labeled and unlabeled training instances
        (a superset of ind.dataset_str.x), scipy.sparse.csr.csr_matrix
    ind.dataset_str.y => 已标记训练实例的one-hot标记,numpy.ndarray
    ind.dataset_str.ty => 已标记测试实例的one-hot标记,numpy.ndarray
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => 网络,{index: [index_of_neighbor_nodes]} as collections.defaultdict object;
    ind.dataset_str.test.index => graph中测试集的索引, for the inductive setting as list object.

    所有数据需保存为python pickle

    :param dataset_str: 数据集名字
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    #【allx的shape:(1708, 1433);ally的shape:(1708, 7)】
    objects = []

    # 读入各数据
    for name in names:
        with open("data/ind.{}.{}".format(dataset_str, name), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
    x, y, tx, ty, allx, ally, graph = tuple(objects)  # pylint: disable=unbalanced-tuple-unpacking
    #print(ally.shape)
    #p = np.sum(ally,axis=1)
    # 导入测试集下标
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)  # 下标排序

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack(
        (allx, tx)).tolil()  # 换一种稀疏矩阵格式lil(适合逐个添加元素,并且能快速获取行相关的数据)
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))  # 邻接矩阵
    #print(adj.shape)

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[
        test_idx_range, :]  #【将labels中test_idx_reorder对应的行转成test_idx_range对应的行,例如将labels的1708行移到2692行】

    idx_test = test_idx_range.tolist()  #【1708~2707】
    idx_train = range(len(y))  #【0~139】
    idx_val = range(len(y), len(y) + 500)  #【140~639】

    train_mask = sample_mask(idx_train, labels.shape[0])  #【0~139行标记为1】
    val_mask = sample_mask(idx_val, labels.shape[0])  #【140~639标记为1】
    test_mask = sample_mask(idx_test, labels.shape[0])  #【1078~2707标记为1】

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]  #【0~139行赋值为labels的0~139】
    y_val[val_mask, :] = labels[val_mask, :]  #【140~639赋值为labels的140~639】
    y_test[test_mask, :] = labels[
        test_mask, :]  #【1078~2707赋值为labels的1078~2707】

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
Esempio n. 52
0
    for i in targetlist:
        global memberlist
        memberlist = []
        memberlist=data['Source'][data.Target==i].tolist()
        print memberlist
        cxlist = (list(itertools.permutations(memberlist,2)))
        for n in cxlist:
            mytuple = tuple([n,i])
            connectionlist += [mytuple]
    #print mytuple
    resultdict=collections.defaultdict(list)

#for x in connectionlist:
# resultdict[x[0]].append(x[1])
#

for x in connectionlist:
    resultdict[x[0]].append(x[1])
#resultdict
G=networkx.from_dict_of_lists(resultdict)

mymatrix = networkx.to_numpy_matrix(G)

mymatrix.shape

mymatrix[0,:]
#G.nodes()
label = [memberlist,memberlist]
mylarry = la.larry(mymatrix,label, dtype=float)

Esempio n. 53
0
def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index(测试实例的id) => the indices of test instances in graph, for the inductive setting as list object.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).

    LIL(基于行的链表格式):稀疏矩阵转换为两个链表 data,data[k]是行k中的非零元素的列表;rows是在位置k包含了在行k中的非零元素列索引列表
    adj(邻接矩阵):格式LIL
    features(特征矩阵):格式LIL
    labels:ally, ty数据集叠加构成
    train_mask, val_mask, test_mask:shaped都为(2708, )的向量,但是train_mask中的[0,140)范围的是True,其余是False;val_mask
    中范围为(140, 640]范围为True,其余的是False;test_mask中范围为[1708,2707]范围是True,其余的是False

    y_train, y_val, y_test:shape都是(2708, 7) 。y_train的值为对应与labels中train_mask为True的行,其余全是0;y_val的值为对应
    与labels中val_mask为True的行,其余全是0;y_test的值为对应与labels中test_mask为True的行,其余全是0
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
Esempio n. 54
0
def isolate_all(xyt_filename, BINS=6, force=False, sparse=False):

    filaments_filename = filament_filename_from_xyt_filename(xyt_filename)  # Assertions inside function
    if not SILENT:
        print "Isolating filaments from:", xyt_filename

    if not force and os.path.isfile(filaments_filename):
        if SILENT:
            return filaments_filename
        else:
            print "Filaments already saved as", filaments_filename
            if "y" not in raw_input("Run isolate_all() anyway? ([no]/yes):  "):
                print "Aborted: isolate_all()"
                return filaments_filename

    hdu_list = config.default_open(xyt_filename)
    ntheta = hdu_list[0].header["NTHETA"]
    wlen = hdu_list[0].header["WLEN"]
    frac = hdu_list[0].header["FRAC"]
    naxis1 = hdu_list[0].header["NAXIS1"]
    naxis2 = hdu_list[0].header["NAXIS2"]
    original = hdu_list[0].header["ORIGINAL"]
    Hi = hdu_list[1].data["hi"]
    Hj = hdu_list[1].data["hj"]

    # Compute TheteRHT for all pixels given, then bin by theta
    B = map(rht.theta_rht, hdu_list[1].data["hthets"])  # List of theta_rht values
    C = np.multiply(np.asarray(B), BINS / np.pi).astype(np.int_)
    del B

    # Ready the output HDUList and close the input HDUList
    output_hdulist = fits.HDUList(hdu_list[0].copy())  # , open(filaments_filename, 'w')) #Overwrites
    hdu_list.close()

    # Set Assignment
    # unprocessed = list()
    list_of_HDUs = list()
    search_pattern = [
        (-1, -1),
        (-1, 0),
        (-1, 1),
        (0, -1),
    ]  # [(-1, 1), (-1,-1), (-1, 0), (0, -1), (-2, -2), (-2, -1), (-2, 0), (-2, 1), (-2, 2), (-1, -2), (-1, 2), (0,-2)]
    for bin in range(BINS):
        delimiter = np.nonzero(C == bin)[0]
        raw_points = zip(Hi[delimiter], Hj[delimiter])
        del delimiter
        problem_size = len(raw_points)
        # message='Step '+str(bin+1)+'/'+str(BINS)+': (N='+str(problem_size)+')'
        # progress_bar = Progress(problem_size, message=message, incrementing=True)

        point_dict = dict([x[::-1] for x in enumerate(raw_points)])
        set_dict = collections.defaultdict(list)
        # theta_dict = dict()

        for coord in raw_points:
            # rht.update_progress(0.3*(i/problem_size), message=message)
            # progress_bar.update()
            # theta_dict[coord] = B[point_dict[coord]]
            for rel_coord in search_pattern:
                try:
                    j = point_dict[config.rel_add(coord, rel_coord)]
                    set_dict[point_dict[coord]].append(j)
                except Exception:
                    continue

        G = nx.from_dict_of_lists(set_dict)  # Undirected graph made using set_dict as an adjacency list
        del set_dict

        # progress_bar = Progress(problem_size, message=message, incrementing=False)
        sources = range(problem_size)
        flags = np.ones((problem_size), dtype=np.int_)
        while len(sources) > 0:
            source = sources.pop()
            if not flags[source]:
                continue
            else:
                # rht.update_progress(0.3+0.3*(1.0-len(sources)/problem_size), message=message)
                # progress_bar.update(len(sources))
                try:
                    for member in nx.descendants(G, source):
                        flags[member] = False
                        point_dict[raw_points[member]] = source
                        G.remove_node(member)  # TODO Remove members from G if that would speed up subsequent calls?
                except nx.NetworkXError:
                    # Assume we hit an isolated pixel (never made it into G) and move on
                    pass
        del sources, flags, G

        histogram = np.bincount(map(point_dict.get, raw_points))
        mask = np.nonzero(histogram >= int(frac * wlen))[0]
        del histogram

        # progress_bar = Progress(problem_size, message=message, incrementing=False)
        mask_dict = dict([x[::-1] for x in enumerate(mask)])
        out_clouds = collections.defaultdict(list)

        while len(point_dict) > 0:
            temp = point_dict.popitem()
            try:
                # Keying into mask_dict is the only operation that ought to throw an exception
                out_clouds[mask_dict[temp[1]]].append(temp[0])
                # progress_bar.update(len(point_dict))
                # rht.update_progress(0.6+0.399*(1.0-len(point_dict)/problem_size), message=message)
            except Exception:
                continue

        while len(out_clouds) > 0:
            cloud = out_clouds.popitem()[1]
            # unprocessed.append(cloud)
            list_of_HDUs.append(config.Cloud(cloud).as_HDU(sparse=sparse))  # TODO Incorporate theta_dict

        # rht.update_progress(1.0, final_message='Finished joining '+str(problem_size)+' points! Time Elapsed:')

    # Convert lists of two-integer tuples into ImageHDUs
    # unprocessed.sort(key=len, reverse=True)
    # output_hdulist = fits.HDUList(map(config.Cloud.as_ImageHDU, map(config.Cloud, unprocessed)))
    # del unprocessed

    list_of_HDUs.sort(key=lambda h: h.header["DIAG"], reverse=False)
    while len(list_of_HDUs) > 0:
        output_hdulist.append(list_of_HDUs.pop())

    # Output HDUList to File
    output_hdulist.writeto(filaments_filename, output_verify="silentfix", clobber=True, checksum=True)
    try:
        output_hdulist.flush()
    except Exception:
        pass
    try:
        output_hdulist.close()
    except Exception:
        pass

    if not SILENT:
        print "Results successfully output to " + filaments_filename
    return filaments_filename
Esempio n. 55
0
def load_citation(dataset_str="cora",
                  normalization="AugNormAdj",
                  porting_to_torch=True,
                  data_path=datadir,
                  task_type="full"):
    """
    Load Citation Networks Datasets.
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open(
                os.path.join(data_path,
                             "ind.{}.{}".format(dataset_str.lower(),
                                                names[i])), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        os.path.join(data_path, "ind.{}.test.index".format(dataset_str)))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    G = nx.from_dict_of_lists(graph)
    adj = nx.adjacency_matrix(G)
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    # degree = np.asarray(G.degree)
    degree = np.sum(adj, axis=1)

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    if task_type == "full":
        print("Load full supervised task.")
        #supervised setting
        idx_test = test_idx_range.tolist()
        idx_train = range(len(ally) - 500)
        idx_val = range(len(ally) - 500, len(ally))
    elif task_type == "semi":
        print("Load semi-supervised task.")
        #semi-supervised setting
        idx_test = test_idx_range.tolist()
        idx_train = range(len(y))
        idx_val = range(len(y), len(y) + 500)
    else:
        raise ValueError(
            "Task type: %s is not supported. Available option: full and semi.")

    adj, features = preprocess_citation(adj, features, normalization)
    features = np.array(features.todense())
    labels = np.argmax(labels, axis=1)
    # porting to pytorch
    if porting_to_torch:
        features = torch.FloatTensor(features).float()
        labels = torch.LongTensor(labels)
        # labels = torch.max(labels, dim=1)[1]
        #adj = sparse_mx_to_torch_sparse_tensor(adj).float()
        adj = torch.FloatTensor(np.array(adj.todense()))
        idx_train = torch.LongTensor(idx_train)
        idx_val = torch.LongTensor(idx_val)
        idx_test = torch.LongTensor(idx_test)
        degree = torch.LongTensor(degree)
    learning_type = "transductive"
    return adj, features, labels, idx_train, idx_val, idx_test, degree, learning_type