Esempio n. 1
0
    def prepare_data_for_stellargraph(self):
        def load_raw_input():
            adj = np.load(os.path.join(self.data_path, 'adj.pkl'),
                          allow_pickle=True)
            features = np.load(os.path.join(self.data_path, 'features.pkl'),
                               allow_pickle=True)
            labels = np.load(os.path.join(self.data_path, 'train.pkl'),
                             allow_pickle=True)
            return adj, features, labels

        print("Reading raw inputs...")
        adj, features, labels = load_raw_input()

        print("creating nodes...")
        adj_list = [[i, j, adj[i, j]] for i, j in zip(*adj.nonzero())]
        tmp_df = pd.DataFrame(adj_list)
        tmp_df.columns = ["source", "target", "weight"]

        print("creating edges...")
        feature_df = pd.DataFrame(features)
        feature_df.columns = [f"w{i}" for i in range(feature_df.shape[1])]

        print("creating labels...")
        label_series = pd.DataFrame({"label": labels})["label"]

        my_graph = StellarGraph({"paper": feature_df}, {"cites": tmp_df})

        print(my_graph.info())
        return my_graph, label_series
def load_from_file(filePrefix):
    nodes_filename = filePrefix + "_nodes.txt"
    edges_filename = filePrefix + "_edges.txt"

    node_features = None
    edge_features = None

    #https://stellargraph.readthedocs.io/en/stable/demos/basics/loading-numpy.html
    with open(nodes_filename) as f:
        num_nodes, num_node_features = map(int, f.readline().split('\t')[:-1])
        if num_node_features > 0:
            node_features = np.zeros((num_node_features, num_nodes))
            for i, line in enumerate(f.readlines()):
                features = np.array(list(map(float, line.split('\t')[1:-1])))
                for fIndex in range(num_node_features):
                    node_features[fIndex][i] = features[fIndex]
                #node_features[i] = features

    # read edge features
    with open(edges_filename) as f:
        num_edges, num_edge_features = map(int, f.readline().split('\t')[:-1])
        senders = np.zeros(num_edges, dtype=int)
        receivers = np.zeros(num_edges, dtype=int)
        if num_edge_features > 0:
            edge_features = np.zeros((num_edge_features, num_edges))

        for i, line in enumerate(f.readlines()):
            elements = line.split('\t')
            senders[i] = int(elements[0])
            receivers[i] = int(elements[1])
            if edge_features is not None:
                features = np.array(list(map(float, elements[2:-1])))
                for fIndex in range(num_edge_features):
                    edge_features[fIndex][i] = features[fIndex]
                #edge_features[i] = np.array(list(map(float, elements[2:-1])))

    square_numeric_edges = pd.DataFrame({
        "source": senders,
        "target": receivers
    })
    square_node_data = pd.DataFrame({"x": node_features[0].tolist()})
    #square_node_data = pd.DataFrame( { "x": node_features[0].tolist(), "y": node_features[1].tolist(), "z" : node_features[2].tolist()  } )

    #feature_array = np.array([[1.0, -0.2], [2.0, 0.3], [3.0, 0.0], [4.0, -0.5]], dtype=np.float32)
    #print("node_features")
    #print(node_features)
    #print("square_numeric_edges")
    #print(square_numeric_edges)
    square_numeric = StellarGraph(square_node_data, edges=square_numeric_edges)

    print("GRAPH INFO")
    print("....................................")
    print(square_numeric.info())
    print("....................................")

    return square_numeric
Esempio n. 3
0
import utils_ctdne as utils

from stellargraph import StellarGraph
from stellargraph.data import EdgeSplitter
from sklearn.model_selection import train_test_split

# import data and create initial graph G
# filename_edges = "Datasets/Cora/cora_cites.csv"
# wt = False
filename_edges = "Datasets/soc-sign-bitcoinotc/soc-sign-bitcoinotc-temporal.csv"
wt=True

edgelist_df = utils.load_data(filename_edges, weighted = wt)

G = StellarGraph(edges = edgelist_df)
print("\n", G.info())
print("Created master graph from data")

# Define an edge splitter on the original graph:
edge_splitter_test = EdgeSplitter(G)

# Randomly sample a fraction p=0.1 of all positive links, 
# and same number of negative links, from graph, and obtain the
# reduced graph graph_test with the sampled links removed:
(
 G_test,         # To compute node embeddings with mode edges than G_train
 examples_test,  
 labels_test
 ) = edge_splitter_test.train_test_split(p=0.1, 
                                         method="global")