Example #1
0
    all_features_pad = [
        pad(feat, (max_nodes, feat.shape[1])) for feat in all_link_features
    ]

    def create_mask(feat, max_nodes):
        return np.array(
            [True if i < feat.shape[0] else False for i in range(max_nodes)])

    all_masks = [create_mask(feat, max_nodes) for feat in all_link_features]
    #num_channels = all_features_pad[0].shape[1]

    #step 3: Create dataset object
    data = [
        Data(adj=torch.from_numpy(adj).float(),
             mask=torch.from_numpy(mask),
             x=torch.from_numpy(x[:, :num_channels]).float(),
             y=torch.from_numpy(np.array([y])).float(),
             std=torch.from_numpy(np.array([std_dict[std]])).float()) for adj,
        mask, x, y, std in zip(all_link_adj_symmetric_pad, all_masks,
                               all_features_pad, all_rewards, std_dict)
    ]
    import random
    random.shuffle(data)

    dataset = dataset.shuffle()
    n = (len(dataset) + 9) // 10
    test_dataset = data[:n]
    val_dataset = data[n:2 * n]
    train_dataset = data[2 * n:]

    with open('test_loader',
Example #2
0
    def load_one_graph(self, fname, mol):
        """Loads one graph

        Args:
            fname (str): hdf5 file name
            mol (str): name of the molecule

        Returns:
            Data object or None: torch_geometric Data object containing the node features, the internal and external edge features, the target and the xyz coordinates. Return None if features cannot be loaded.
        """
        f5 = h5py.File(fname, 'r')
        try:
            grp = f5[mol]
        except:
            f5.close()
            return None

        # nodes
        data = ()
        try:
            for feat in self.node_feature:
                vals = grp['node_data/'+feat][()]
                if vals.ndim == 1:
                    vals = vals.reshape(-1, 1)
                data += (vals,)
            x = torch.tensor(np.hstack(data), dtype=torch.float)

        except:
            print('node attributes not found in the file',
                  self.database[0])
            f5.close()
            return None

        try:
            # index ! we have to have all the edges i.e : (i,j) and (j,i)
            ind = grp['edge_index'][()]
            ind = np.vstack((ind, np.flip(ind, 1))).T
            edge_index = torch.tensor(
                ind, dtype=torch.long).contiguous()

            # edge feature (same issue than above)
            data = ()
            if self.edge_feature is not None:
                for feat in self.edge_feature:
                    vals = grp['edge_data/'+feat][()]
                    if vals.ndim == 1:
                        vals = vals.reshape(-1, 1)
                    data += (vals,)
                data = np.hstack(data)
                data = np.vstack((data, data))
                data = self.edge_feature_transform(data)
                edge_attr = torch.tensor(
                    data, dtype=torch.float).contiguous()

            else:
                edge_attr = None

            # internal edges
            ind = grp['internal_edge_index'][()]
            ind = np.vstack((ind, np.flip(ind, 1))).T
            internal_edge_index = torch.tensor(
                ind, dtype=torch.long).contiguous()

            # internal edge feature
            data = ()
            if self.edge_feature is not None:
                for feat in self.edge_feature:
                    vals = grp['internal_edge_data/'+feat][()]
                    if vals.ndim == 1:
                        vals = vals.reshape(-1, 1)
                    data += (vals,)
                data = np.hstack(data)
                data = np.vstack((data, data))
                data = self.edge_feature_transform(data)
                internal_edge_attr = torch.tensor(
                    data, dtype=torch.float).contiguous()

            else:
                internal_edge_attr = None

        except:
            print('edge features not found in the file',
                  self.database[0])
            f5.close()
            return None

        # target
        if self.target is None:
            y = None

        else:
            if self.target in grp['score/'].keys():
                if grp['score/'+self.target][()] is not None:
                    y = torch.tensor(
                        [grp['score/'+self.target][()]], dtype=torch.float).contiguous()
                else:
                    y = None
            else:
                y = None
        # pos
        pos = torch.tensor(grp['node_data/pos/']
                           [()], dtype=torch.float).contiguous()

        # load
        data = Data(x=x,
                    edge_index=edge_index,
                    edge_attr=edge_attr,
                    y=y,
                    pos=pos)

        data.internal_edge_index = internal_edge_index
        data.internal_edge_attr = internal_edge_attr

        # mol name
        data.mol = mol

        # cluster
        if 'clustering' in grp.keys():
            if self.clustering_method in grp['clustering'].keys():
                if ('depth_0' in grp['clustering/{}'.format(self.clustering_method)].keys() and
                        'depth_1' in grp['clustering/{}'.format(
                            self.clustering_method)].keys()
                    ):
                    data.cluster0 = torch.tensor(
                        grp['clustering/' + self.clustering_method + '/depth_0'][()], dtype=torch.long)
                    data.cluster1 = torch.tensor(
                        grp['clustering/' + self.clustering_method + '/depth_1'][()], dtype=torch.long)
                else:
                    print('WARNING: no cluster detected')
            else:
                print('WARNING: no cluster detected')
        else:
            print('WARNING: no cluster detected')

        f5.close()
        return data
Example #3
0
            test_dataset = pickle.load(test_file)
            train_dataset = pickle.load(train_file)
            val_dataset = pickle.load(val_file)
    else:
        os.makedirs(dataset_dir, exist_ok=True)

        raw_dataset_path = os.path.join(current_dir, 'data',
                                        args.dataset_name + '.csv')

        all_features, all_link_adj, all_masks, all_rewards \
            = load_partial_design_data(raw_dataset_path, os.path.join(project_dir, 'data/designs/grammar_jan21.dot'))

        # Create dataset object
        data = [
            Data(adj=torch.from_numpy(adj).float(),
                 mask=torch.from_numpy(mask),
                 x=torch.from_numpy(x).float(),
                 y=torch.from_numpy(np.array([y])).float()) for adj, mask, x, y
            in zip(all_link_adj, all_masks, all_features, all_rewards)
        ]
        random.shuffle(data)

        n_val = (len(data) + 9) // 10
        n_test = (len(data) + 9) // 10
        train_dataset = data[:-n_test - n_val]
        val_dataset = data[-n_test - n_val:-n_test]
        test_dataset = data[-n_test:]

        with open(testset_path, 'wb') as test_file, open(
                valset_path, 'wb') as val_file, open(trainset_path,
                                                     'wb') as train_file:
            pickle.dump(test_dataset, test_file)
 def __getitem__(self, idx):
     pos = torch.from_numpy(np.random.normal(0, 1, (self.num_points, 3)))
     y = torch.from_numpy(np.random.normal(0, 1, (self.num_points, self.output_nc)))
     x = torch.from_numpy(np.random.normal(0, 1, (self.num_points, self.input_nc)))
     return Data(x=x, pos=pos, y=y)
    def test_num_batches(self):
        data = Data(pos=torch.randn((2, 3, 3)))
        self.assertEqual(MockBaseDataset.get_num_samples(data, ConvolutionFormat.DENSE.value), 2)

        data = Data(pos=torch.randn((3, 3)), batch=torch.tensor([0, 1, 2]))
        self.assertEqual(MockBaseDataset.get_num_samples(data, ConvolutionFormat.PARTIAL_DENSE.value), 3)
Example #6
0
    def to_homogeneous(self,
                       node_attrs: Optional[List[str]] = None,
                       edge_attrs: Optional[List[str]] = None,
                       add_node_type: bool = True,
                       add_edge_type: bool = True) -> Data:
        """Converts a :class:`~torch_geometric.data.HeteroData` object to a
        homogeneous :class:`~torch_geometric.data.Data` object.
        By default, all features with same feature dimensionality across
        different types will be merged into a single representation, unless
        otherwise specified via the :obj:`node_attrs` and :obj:`edge_attrs`
        arguments.
        Furthermore, attributes named :obj:`node_type` and :obj:`edge_type`
        will be added to the returned :class:`~torch_geometric.data.Data`
        object, denoting node-level and edge-level vectors holding the
        node and edge type as integers, respectively.

        Args:
            node_attrs (List[str], optional): The node features to combine
                across all node types. These node features need to be of the
                same feature dimensionality. If set to :obj:`None`, will
                automatically determine which node features to combine.
                (default: :obj:`None`)
            edge_attrs (List[str], optional): The edge features to combine
                across all edge types. These edge features need to be of the
                same feature dimensionality. If set to :obj:`None`, will
                automatically determine which edge features to combine.
                (default: :obj:`None`)
            add_node_type (bool, optional): If set to :obj:`False`, will not
                add the node-level vector :obj:`node_type` to the returned
                :class:`~torch_geometric.data.Data` object.
                (default: :obj:`True`)
            add_edge_type (bool, optional): If set to :obj:`False`, will not
                add the edge-level vector :obj:`edge_type` to the returned
                :class:`~torch_geometric.data.Data` object.
                (default: :obj:`True`)
        """
        def _consistent_size(stores: List[BaseStorage]) -> List[str]:
            sizes_dict = defaultdict(list)
            for store in stores:
                for key, value in store.items():
                    if key in ['edge_index', 'adj_t']:
                        continue
                    if isinstance(value, Tensor):
                        dim = self.__cat_dim__(key, value, store)
                        size = value.size()[:dim] + value.size()[dim + 1:]
                        sizes_dict[key].append(tuple(size))
            return [
                k for k, sizes in sizes_dict.items()
                if len(sizes) == len(stores) and len(set(sizes)) == 1
            ]

        edge_index, node_slices, edge_slices = to_homogeneous_edge_index(self)
        device = edge_index.device if edge_index is not None else None

        data = Data(**self._global_store.to_dict())
        if edge_index is not None:
            data.edge_index = edge_index
        data._node_type_names = list(node_slices.keys())
        data._edge_type_names = list(edge_slices.keys())

        # Combine node attributes into a single tensor:
        if node_attrs is None:
            node_attrs = _consistent_size(self.node_stores)
        for key in node_attrs:
            values = [store[key] for store in self.node_stores]
            dim = self.__cat_dim__(key, values[0], self.node_stores[0])
            value = torch.cat(values, dim) if len(values) > 1 else values[0]
            data[key] = value

        if not data.can_infer_num_nodes:
            data.num_nodes = list(node_slices.values())[-1][1]

        # Combine edge attributes into a single tensor:
        if edge_attrs is None:
            edge_attrs = _consistent_size(self.edge_stores)
        for key in edge_attrs:
            values = [store[key] for store in self.edge_stores]
            dim = self.__cat_dim__(key, values[0], self.edge_stores[0])
            value = torch.cat(values, dim) if len(values) > 1 else values[0]
            data[key] = value

        if add_node_type:
            sizes = [offset[1] - offset[0] for offset in node_slices.values()]
            sizes = torch.tensor(sizes, dtype=torch.long, device=device)
            node_type = torch.arange(len(sizes), device=device)
            data.node_type = node_type.repeat_interleave(sizes)

        if add_edge_type and edge_index is not None:
            sizes = [offset[1] - offset[0] for offset in edge_slices.values()]
            sizes = torch.tensor(sizes, dtype=torch.long, device=device)
            edge_type = torch.arange(len(sizes), device=device)
            data.edge_type = edge_type.repeat_interleave(sizes)

        return data