Example #1
0
    def get_nonzero_feature_indices(self, node_idx):
        """ Gets indices of the nonzero values in a one-hot encoded atomic feature
        vector (for converting a feature vector into an `rdkit.Atom` object).

        Args:
          node_idx (int) : Index for a specific node in the `PreprocessingGraph`.

        Returns:
          segment_idc (list) : Contains the nonzero indices of the atom
            type, formal charge, number of implicit Hs, and chirality that
            describe a specific node in a feature vector. The indices are
            "corrected" for each one-hot encoded segment of the feature vector.
        """
        fv_idc = util.get_feature_vector_indices()

        # **note: "idx" == "idx", "idc" == "indices"
        idc = np.nonzero(self.node_features[node_idx])[0]

        # correct for the concatenation of the different segments
        # of each node feature vector
        segment_idc = [idc[0]]
        for idx, value in enumerate(idc[1:]):
            segment_idc.append(value - fv_idc[idx])

        return segment_idc
Example #2
0
def get_node_feature_distribution(molecular_graphs):
    """ Returns a `tuple` of histograms (`torch.Tensor`s) for atom types, formal
    charges, number of implicit Hs, and chiral states that are present in the
    input `molecular_graphs` (`list` of `MolecularGraph`s). Each histogram is a
    `list` where the nth item corresponds to the count of the nth property in
    `atom_types`, `formal_charge`, `imp_H`, and `chirality`, respectively.
    """
    # sum up all node feature vectors to get an un-normalized histogram
    if type(molecular_graphs[0].node_features) == torch.Tensor:
        nodes_hist = torch.zeros(C.n_node_features, device="cuda")
    else:
        nodes_hist = np.zeros(C.n_node_features)

    # loop over all the node feature matrices of the input `TrainingGraph`s
    for molecular_graph in molecular_graphs:
        try:
            nodes_hist += torch.sum(molecular_graph.node_features, dim=0)
        except TypeError:
            nodes_hist += np.sum(molecular_graph.node_features, axis=0)

    idc = util.get_feature_vector_indices()  # **note: "idc" == "indices"

    # split up `nodes_hist` into atom types hist, formal charge hist, etc
    # `atom_type_histogram` and `formal_charge_histogram` are calculated by
    # default, and if specified, also `numh_histogram` and `chirality_histogram`
    atom_type_histogram = nodes_hist[:idc[0]]

    formal_charge_histogram = nodes_hist[idc[0]:idc[1]]

    if not C.use_explicit_H and not C.ignore_H:
        numh_histogram = nodes_hist[idc[1]:idc[2]]
    else:
        numh_histogram = [0] * C.n_imp_H

    if C.use_chirality:
        correction = int(not C.use_explicit_H and not C.ignore_H)
        chirality_histogram = nodes_hist[idc[1 + correction]:idc[2 +
                                                                 correction]]
    else:
        chirality_histogram = [0] * C.n_chirality

    return (atom_type_histogram, formal_charge_histogram, numh_histogram,
            chirality_histogram)
Example #3
0
        def _get_node_feature_distribution(molecular_graphs : list) -> \
                                           Tuple[Union[torch.Tensor, np.ndarray], ...]:
            """
            Returns a `tuple` of histograms (`torch.Tensor`s) for atom types, formal charges,
            number of implicit Hs, and chiral states that are present in the input
            `molecular_graphs`. Each histogram is a `list` where the nth item corresponds to the
            count of the nth property in `atom_types`, `formal_charge`, `imp_H`, and `chirality`.
            """
            # sum up all node feature vectors to get an un-normalized histogram
            if type(molecular_graphs[0].node_features) == torch.Tensor:
                nodes_hist = torch.zeros(constants.n_node_features,
                                         device=constants.device)
            else:
                nodes_hist = np.zeros(constants.n_node_features)

            # loop over all the node feature matrices of the input `TrainingGraph`s
            for molecular_graph in molecular_graphs:
                try:
                    nodes_hist += torch.sum(molecular_graph.node_features,
                                            dim=0)
                except TypeError:
                    nodes_hist += np.sum(molecular_graph.node_features, axis=0)

            idc = util.get_feature_vector_indices(
            )  # **note: "idc" == "indices"

            # split up `nodes_hist` into atom types hist, formal charge hist, etc
            atom_type_histogram = nodes_hist[:idc[0]]
            formal_charge_histogram = nodes_hist[idc[0]:idc[1]]
            if not constants.use_explicit_H and not constants.ignore_H:
                numh_histogram = nodes_hist[idc[1]:idc[2]]
            else:
                numh_histogram = [0] * constants.n_imp_H
            if constants.use_chirality:
                correction = int(not constants.use_explicit_H
                                 and not constants.ignore_H)
                chirality_histogram = nodes_hist[
                    idc[1 + correction]:idc[2 + correction]]
            else:
                chirality_histogram = [0] * constants.n_chirality

            return (atom_type_histogram, formal_charge_histogram,
                    numh_histogram, chirality_histogram)