def prepare_data_for_stellargraph(self): def load_raw_input(): adj = np.load(os.path.join(self.data_path, 'adj.pkl'), allow_pickle=True) features = np.load(os.path.join(self.data_path, 'features.pkl'), allow_pickle=True) labels = np.load(os.path.join(self.data_path, 'train.pkl'), allow_pickle=True) return adj, features, labels print("Reading raw inputs...") adj, features, labels = load_raw_input() print("creating nodes...") adj_list = [[i, j, adj[i, j]] for i, j in zip(*adj.nonzero())] tmp_df = pd.DataFrame(adj_list) tmp_df.columns = ["source", "target", "weight"] print("creating edges...") feature_df = pd.DataFrame(features) feature_df.columns = [f"w{i}" for i in range(feature_df.shape[1])] print("creating labels...") label_series = pd.DataFrame({"label": labels})["label"] my_graph = StellarGraph({"paper": feature_df}, {"cites": tmp_df}) print(my_graph.info()) return my_graph, label_series
def load_from_file(filePrefix): nodes_filename = filePrefix + "_nodes.txt" edges_filename = filePrefix + "_edges.txt" node_features = None edge_features = None #https://stellargraph.readthedocs.io/en/stable/demos/basics/loading-numpy.html with open(nodes_filename) as f: num_nodes, num_node_features = map(int, f.readline().split('\t')[:-1]) if num_node_features > 0: node_features = np.zeros((num_node_features, num_nodes)) for i, line in enumerate(f.readlines()): features = np.array(list(map(float, line.split('\t')[1:-1]))) for fIndex in range(num_node_features): node_features[fIndex][i] = features[fIndex] #node_features[i] = features # read edge features with open(edges_filename) as f: num_edges, num_edge_features = map(int, f.readline().split('\t')[:-1]) senders = np.zeros(num_edges, dtype=int) receivers = np.zeros(num_edges, dtype=int) if num_edge_features > 0: edge_features = np.zeros((num_edge_features, num_edges)) for i, line in enumerate(f.readlines()): elements = line.split('\t') senders[i] = int(elements[0]) receivers[i] = int(elements[1]) if edge_features is not None: features = np.array(list(map(float, elements[2:-1]))) for fIndex in range(num_edge_features): edge_features[fIndex][i] = features[fIndex] #edge_features[i] = np.array(list(map(float, elements[2:-1]))) square_numeric_edges = pd.DataFrame({ "source": senders, "target": receivers }) square_node_data = pd.DataFrame({"x": node_features[0].tolist()}) #square_node_data = pd.DataFrame( { "x": node_features[0].tolist(), "y": node_features[1].tolist(), "z" : node_features[2].tolist() } ) #feature_array = np.array([[1.0, -0.2], [2.0, 0.3], [3.0, 0.0], [4.0, -0.5]], dtype=np.float32) #print("node_features") #print(node_features) #print("square_numeric_edges") #print(square_numeric_edges) square_numeric = StellarGraph(square_node_data, edges=square_numeric_edges) print("GRAPH INFO") print("....................................") print(square_numeric.info()) print("....................................") return square_numeric
import utils_ctdne as utils from stellargraph import StellarGraph from stellargraph.data import EdgeSplitter from sklearn.model_selection import train_test_split # import data and create initial graph G # filename_edges = "Datasets/Cora/cora_cites.csv" # wt = False filename_edges = "Datasets/soc-sign-bitcoinotc/soc-sign-bitcoinotc-temporal.csv" wt=True edgelist_df = utils.load_data(filename_edges, weighted = wt) G = StellarGraph(edges = edgelist_df) print("\n", G.info()) print("Created master graph from data") # Define an edge splitter on the original graph: edge_splitter_test = EdgeSplitter(G) # Randomly sample a fraction p=0.1 of all positive links, # and same number of negative links, from graph, and obtain the # reduced graph graph_test with the sampled links removed: ( G_test, # To compute node embeddings with mode edges than G_train examples_test, labels_test ) = edge_splitter_test.train_test_split(p=0.1, method="global")