Exemple #1
0
 def process(self):
     with open(os.path.join(self.root, self.name + '_%s.pkl' % self.split),
               'rb') as f:
         self.dataset = pickle.load(f)
     # self.n_samples = len(self.dataset)
     print("preparing graphs for the %s set..." % (self.split.upper()))
     print('Converting graphs into PyG objects...')
     pyg_graph_list = []
     for data in tqdm(self.dataset):
         node_features = data.node_feat
         edge_list = (data.W !=
                      0).nonzero()  # converting adj matrix to edge_list
         g = Data()
         g.__num_nodes__ = node_features.size(0)
         g.edge_index = edge_list.T
         #g.edge_index = torch.from_numpy(edge_list)
         g.x = node_features.long()
         # adding edge features for Residual Gated ConvNet
         edge_feat_dim = 1
         g.edge_attr = torch.ones(g.num_edges, edge_feat_dim)
         g.y = data.node_label.to(torch.float32)
         pyg_graph_list.append(g)
     del self.dataset
     data, slices = self.collate(pyg_graph_list)
     print('Saving...')
     torch.save((data, slices), self.processed_paths[0])
Exemple #2
0
def read_csv_graph_pyg(raw_dir, add_inverse_edge = True, additional_node_files = [], additional_edge_files = []):

    graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files)
    pyg_graph_list = []

    print('Converting graphs into PyG objects...')
    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.tensor(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.tensor(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.tensor(graph["node_feat"])

        for key in additional_node_files:
            g[key] = torch.tensor(graph[key])

        for key in additional_edge_files:
            g[key] = torch.tensor(graph[key])

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #3
0
def read_graph_pyg(raw_dir,
                   add_inverse_edge=False,
                   additional_node_files=[],
                   additional_edge_files=[],
                   binary=False):

    if binary:
        # npz
        graph_list = read_binary_graph_raw(raw_dir, add_inverse_edge)
    else:
        # csv
        graph_list = read_csv_graph_raw(
            raw_dir,
            add_inverse_edge,
            additional_node_files=additional_node_files,
            additional_edge_files=additional_edge_files)

    pyg_graph_list = []

    print('Converting graphs into PyG objects...')

    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph['num_nodes']
        g.edge_index = torch.from_numpy(graph['edge_index'])

        del graph['num_nodes']
        del graph['edge_index']

        if graph['edge_feat'] is not None:
            g.edge_attr = torch.from_numpy(graph['edge_feat'])
            del graph['edge_feat']

        if graph['node_feat'] is not None:
            g.x = torch.from_numpy(graph['node_feat'])
            del graph['node_feat']

        for key in additional_node_files:
            g[key] = torch.from_numpy(graph[key])
            del graph[key]

        for key in additional_edge_files:
            g[key] = torch.from_numpy(graph[key])
            del graph[key]

        pyg_graph_list.append(g)

        add_order_info_01(g)  # DAGNN
        # length of longest path
        # layer ids start with 0 so max, gives actual path length and -1 is not necessary
        g.len_longest_path = float(torch.max(g._bi_layer_idx0).item())

    return pyg_graph_list
Exemple #4
0
    def __getitem__(self, idx):
        '''Get datapoint with index'''
        data = Data()
        smiles, y = self.smiles_list[idx]
        graph = self.smiles2graph(smiles)

        data.__num_nodes__ = int(graph['num_nodes'])
        data.edge_index = torch.from_numpy(graph['edge_index']).to(torch.int64)
        data.edge_attr = torch.from_numpy(graph['edge_feat']).to(torch.int64)
        data.x = torch.from_numpy(graph['node_feat']).to(torch.int64)

        return data
def read_graph_pyg(raw_dir,
                   add_inverse_edge=False,
                   additional_node_files=[],
                   additional_edge_files=[],
                   binary=False):

    if binary:
        # npz
        graph_list = read_binary_graph_raw(raw_dir, add_inverse_edge)
    else:
        # csv
        graph_list = read_csv_graph_raw(
            raw_dir,
            add_inverse_edge,
            additional_node_files=additional_node_files,
            additional_edge_files=additional_edge_files)

    pyg_graph_list = []

    print('Converting graphs into PyG objects...')

    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph['num_nodes']
        g.edge_index = torch.from_numpy(graph['edge_index'])

        del graph['num_nodes']
        del graph['edge_index']

        if graph['edge_feat'] is not None:
            g.edge_attr = torch.from_numpy(graph['edge_feat'])
            del graph['edge_feat']

        if graph['node_feat'] is not None:
            g.x = torch.from_numpy(graph['node_feat'])
            del graph['node_feat']

        for key in additional_node_files:
            g[key] = torch.from_numpy(graph[key])
            del graph[key]

        for key in additional_edge_files:
            g[key] = torch.from_numpy(graph[key])
            del graph[key]

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #6
0
    def process(self):
        data_df = pd.read_csv(osp.join(self.raw_dir, 'data.csv.gz'))
        smiles_list = data_df['smiles']
        homolumogap_list = data_df['homolumogap']

        print('Converting SMILES strings into graphs...')
        data_list = []
        for i in tqdm(range(len(smiles_list))):
            data = Data()

            smiles = smiles_list[i]
            homolumogap = homolumogap_list[i]
            graph = self.smiles2graph(smiles)

            assert (len(graph['edge_feat']) == graph['edge_index'].shape[1])
            assert (len(graph['node_feat']) == graph['num_nodes'])

            data.__num_nodes__ = int(graph['num_nodes'])
            data.edge_index = torch.from_numpy(graph['edge_index']).to(
                torch.int64)
            data.edge_attr = torch.from_numpy(graph['edge_feat']).to(
                torch.int64)
            data.x = torch.from_numpy(graph['node_feat']).to(torch.int64)
            data.y = torch.Tensor([homolumogap])

            data_list.append(data)

        # double-check prediction target
        split_dict = self.get_idx_split()
        assert (all(
            [not torch.isnan(data_list[i].y)[0] for i in split_dict['train']]))
        assert (all(
            [not torch.isnan(data_list[i].y)[0] for i in split_dict['valid']]))
        assert (all(
            [torch.isnan(data_list[i].y)[0] for i in split_dict['test-dev']]))
        assert (all([
            torch.isnan(data_list[i].y)[0]
            for i in split_dict['test-challenge']
        ]))

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)

        print('Saving...')
        torch.save((data, slices), self.processed_paths[0])
Exemple #7
0
def read_csv_graph_pyg(raw_dir,
                       add_inverse_edge=True,
                       additional_node_files=[],
                       additional_edge_files=[]):

    graph_list = read_csv_graph_raw(
        raw_dir,
        add_inverse_edge,
        additional_node_files=additional_node_files,
        additional_edge_files=additional_edge_files)
    pyg_graph_list = []

    print('Converting graphs into PyG objects...')

    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.from_numpy(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.from_numpy(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.from_numpy(graph["node_feat"])

        for key in additional_node_files:
            if 'node_' not in key:
                feat_name = 'node_' + key
            else:
                feat_name = key
            g[feat_name] = torch.from_numpy(graph[feat_name])

        for key in additional_edge_files:
            if 'edge_' not in key:
                feat_name = 'edge_' + key
            else:
                feat_name = key
            g[feat_name] = torch.from_numpy(graph[feat_name])

        add_order_info_01(g)  # DAGNN
        # length of longest path
        # layer ids start with 0 so max, gives actual path length and -1 is not necessary
        g.len_longest_path = float(torch.max(g._bi_layer_idx0).item())

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #8
0
def read_csv_graph_pyg(raw_dir, add_inverse_edge=False):

    graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge)
    pyg_graph_list = []

    for graph in graph_list:
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.tensor(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.tensor(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.tensor(graph["node_feat"])

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #9
0
def read_csv_heterograph_pyg(raw_dir, add_inverse_edge = False, additional_node_files = [], additional_edge_files = []):

    graph_list = read_csv_heterograph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files)
    pyg_graph_list = []

    print('Converting graphs into PyG objects...')

    for graph in tqdm(graph_list):
        g = Data()
        
        g.__num_nodes__ = graph["num_nodes_dict"]
        g.num_nodes_dict = graph["num_nodes_dict"]

        # add edge connectivity
        g.edge_index_dict = {}
        for triplet, edge_index in graph["edge_index_dict"].items():
            g.edge_index_dict[triplet] = torch.from_numpy(edge_index)

        if graph["edge_feat_dict"] is not None:
            g.edge_attr_dict = {}
            for triplet in graph["edge_feat_dict"].keys():
                g.edge_attr_dict[triplet] = torch.from_numpy(graph["edge_feat_dict"][triplet])

        if graph["node_feat_dict"] is not None:
            g.x_dict = {}
            for nodetype in graph["node_feat_dict"].keys():
                g.x_dict[nodetype] = torch.from_numpy(graph["node_feat_dict"][nodetype])

        for key in additional_edge_files:
            g[key + '_dict'] = {}
            for triplet in graph[key].keys():
                g[key + '_dict'][triplet] = torch.from_numpy(graph[key][triplet])

        for key in additional_node_files:
            g[key + '_dict'] = {}
            for nodetype in graph[key].keys():
                g[key + '_dict'][nodetype] = torch.from_numpy(graph[key][nodetype])

        pyg_graph_list.append(g)


    return pyg_graph_list
Exemple #10
0
def read_csv_graph_pyg(raw_dir,
                       add_inverse_edge=True,
                       additional_node_files=[],
                       additional_edge_files=[]):

    graph_list = read_csv_graph_raw(
        raw_dir,
        add_inverse_edge,
        additional_node_files=additional_node_files,
        additional_edge_files=additional_edge_files)
    pyg_graph_list = []

    print('Converting graphs into PyG objects...')

    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.from_numpy(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.from_numpy(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.from_numpy(graph["node_feat"])

        for key in additional_node_files:
            if 'node_' not in key:
                feat_name = 'node_' + key
            else:
                feat_name = key
            g[feat_name] = torch.from_numpy(graph[feat_name])

        for key in additional_edge_files:
            if 'edge_' not in key:
                feat_name = 'edge_' + key
            else:
                feat_name = key
            g[feat_name] = torch.from_numpy(graph[feat_name])

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #11
0
def read_csv_graph_pyg(raw_dir, add_inverse_edge=False):

    graph_list = read_csv_graph_raw(raw_dir, add_inverse_edge)
    pyg_graph_list = []

    print('Converting graphs into PyG objects...')
    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.tensor(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.tensor(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.tensor(graph["node_feat"])

        pyg_graph_list.append(g)

    return pyg_graph_list
Exemple #12
0
def graphs_to_pyg(graph_list):
    pyg_graph_list = []
    print('Converting graphs into PyG objects...')
    for graph in tqdm(graph_list):
        g = Data()
        g.__num_nodes__ = graph["num_nodes"]
        g.edge_index = torch.tensor(graph["edge_index"])

        if graph["edge_feat"] is not None:
            g.edge_attr = torch.DoubleTensor(graph["edge_feat"])

        if graph["node_feat"] is not None:
            g.x = torch.DoubleTensor(graph["node_feat"])

        if graph["target"] is not None:
            tar = int(graph["target"])
            tar = 0 if tar == -1 else tar
            g.y = torch.LongTensor([tar])

        pyg_graph_list.append(g)

    return pyg_graph_list