def get_nonzero_feature_indices(self, node_idx): """ Gets indices of the nonzero values in a one-hot encoded atomic feature vector (for converting a feature vector into an `rdkit.Atom` object). Args: node_idx (int) : Index for a specific node in the `PreprocessingGraph`. Returns: segment_idc (list) : Contains the nonzero indices of the atom type, formal charge, number of implicit Hs, and chirality that describe a specific node in a feature vector. The indices are "corrected" for each one-hot encoded segment of the feature vector. """ fv_idc = util.get_feature_vector_indices() # **note: "idx" == "idx", "idc" == "indices" idc = np.nonzero(self.node_features[node_idx])[0] # correct for the concatenation of the different segments # of each node feature vector segment_idc = [idc[0]] for idx, value in enumerate(idc[1:]): segment_idc.append(value - fv_idc[idx]) return segment_idc
def get_node_feature_distribution(molecular_graphs): """ Returns a `tuple` of histograms (`torch.Tensor`s) for atom types, formal charges, number of implicit Hs, and chiral states that are present in the input `molecular_graphs` (`list` of `MolecularGraph`s). Each histogram is a `list` where the nth item corresponds to the count of the nth property in `atom_types`, `formal_charge`, `imp_H`, and `chirality`, respectively. """ # sum up all node feature vectors to get an un-normalized histogram if type(molecular_graphs[0].node_features) == torch.Tensor: nodes_hist = torch.zeros(C.n_node_features, device="cuda") else: nodes_hist = np.zeros(C.n_node_features) # loop over all the node feature matrices of the input `TrainingGraph`s for molecular_graph in molecular_graphs: try: nodes_hist += torch.sum(molecular_graph.node_features, dim=0) except TypeError: nodes_hist += np.sum(molecular_graph.node_features, axis=0) idc = util.get_feature_vector_indices() # **note: "idc" == "indices" # split up `nodes_hist` into atom types hist, formal charge hist, etc # `atom_type_histogram` and `formal_charge_histogram` are calculated by # default, and if specified, also `numh_histogram` and `chirality_histogram` atom_type_histogram = nodes_hist[:idc[0]] formal_charge_histogram = nodes_hist[idc[0]:idc[1]] if not C.use_explicit_H and not C.ignore_H: numh_histogram = nodes_hist[idc[1]:idc[2]] else: numh_histogram = [0] * C.n_imp_H if C.use_chirality: correction = int(not C.use_explicit_H and not C.ignore_H) chirality_histogram = nodes_hist[idc[1 + correction]:idc[2 + correction]] else: chirality_histogram = [0] * C.n_chirality return (atom_type_histogram, formal_charge_histogram, numh_histogram, chirality_histogram)
def _get_node_feature_distribution(molecular_graphs : list) -> \ Tuple[Union[torch.Tensor, np.ndarray], ...]: """ Returns a `tuple` of histograms (`torch.Tensor`s) for atom types, formal charges, number of implicit Hs, and chiral states that are present in the input `molecular_graphs`. Each histogram is a `list` where the nth item corresponds to the count of the nth property in `atom_types`, `formal_charge`, `imp_H`, and `chirality`. """ # sum up all node feature vectors to get an un-normalized histogram if type(molecular_graphs[0].node_features) == torch.Tensor: nodes_hist = torch.zeros(constants.n_node_features, device=constants.device) else: nodes_hist = np.zeros(constants.n_node_features) # loop over all the node feature matrices of the input `TrainingGraph`s for molecular_graph in molecular_graphs: try: nodes_hist += torch.sum(molecular_graph.node_features, dim=0) except TypeError: nodes_hist += np.sum(molecular_graph.node_features, axis=0) idc = util.get_feature_vector_indices( ) # **note: "idc" == "indices" # split up `nodes_hist` into atom types hist, formal charge hist, etc atom_type_histogram = nodes_hist[:idc[0]] formal_charge_histogram = nodes_hist[idc[0]:idc[1]] if not constants.use_explicit_H and not constants.ignore_H: numh_histogram = nodes_hist[idc[1]:idc[2]] else: numh_histogram = [0] * constants.n_imp_H if constants.use_chirality: correction = int(not constants.use_explicit_H and not constants.ignore_H) chirality_histogram = nodes_hist[ idc[1 + correction]:idc[2 + correction]] else: chirality_histogram = [0] * constants.n_chirality return (atom_type_histogram, formal_charge_histogram, numh_histogram, chirality_histogram)