Esempio n. 1
0
    def test_get_atom_is_in_aromatic_one_hot(self):
        atoms = self.mol.GetAtoms()
        assert atoms[0].GetSymbol() == "C"
        one_hot = get_atom_is_in_aromatic_one_hot(atoms[0])
        assert one_hot == [0.0]

        atoms = self.mol_benzene.GetAtoms()
        assert atoms[0].GetSymbol() == "C"
        one_hot = get_atom_is_in_aromatic_one_hot(atoms[0])
        assert one_hot == [1.0]
    def _pagtn_atom_featurizer(self, atom: RDKitAtom) -> np.ndarray:
        """Calculate Atom features from RDKit atom object.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit mol object.

    Returns
    -------
    atom_feat: np.ndarray
      numpy vector of atom features.
    """
        atom_type = get_atom_type_one_hot(atom, self.SYMBOLS, False)
        formal_charge = get_atom_formal_charge_one_hot(
            atom, include_unknown_set=False)
        degree = get_atom_total_degree_one_hot(atom, list(range(11)), False)
        exp_valence = get_atom_explicit_valence_one_hot(
            atom, list(range(7)), False)
        imp_valence = get_atom_implicit_valence_one_hot(
            atom, list(range(6)), False)
        armoticity = get_atom_is_in_aromatic_one_hot(atom)
        atom_feat = np.concatenate([
            atom_type, formal_charge, degree, exp_valence, imp_valence,
            armoticity
        ])
        return atom_feat
def _construct_atom_feature(atom: RDKitAtom, h_bond_infos: List[Tuple[int,
                                                                      str]],
                            sssr: List[Sequence]) -> List[float]:
    """Construct an atom feature from a RDKit atom object.

  Parameters
  ----------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  h_bond_infos: List[Tuple[int, str]]
    A list of tuple `(atom_index, hydrogen_bonding_type)`.
    Basically, it is expected that this value is the return value of
    `construct_hydrogen_bonding_info`. The `hydrogen_bonding_type`
    value is "Acceptor" or "Donor".
  sssr: List[Sequence]
    The return value of `Chem.GetSymmSSSR(mol)`.
    The value is a sequence of rings.

  Returns
  -------
  List[float]
    A one-hot vector of the atom feature.
  """
    atom_type = get_atom_type_one_hot(atom)
    chirality = get_atom_chirality_one_hot(atom)
    formal_charge = get_atom_formal_charge(atom)
    partial_charge = get_atom_partial_charge(atom)
    ring_size = get_atom_ring_size_one_hot(atom, sssr)
    hybridization = get_atom_hybridization_one_hot(atom)
    acceptor_donor = get_atom_hydrogen_bonding_one_hot(atom, h_bond_infos)
    aromatic = get_atom_is_in_aromatic_one_hot(atom)
    degree = get_atom_total_degree_one_hot(atom)
    total_num = get_atom_total_num_Hs_one_hot(atom)
    return atom_type + chirality + formal_charge + partial_charge + \
      ring_size + hybridization + acceptor_donor + aromatic + degree + total_num
def _construct_atom_feature(atom: RDKitAtom, h_bond_infos: List[Tuple[int,
                                                                      str]],
                            use_chirality: bool,
                            use_partial_charge: bool) -> np.ndarray:
    """Construct an atom feature from a RDKit atom object.

  Parameters
  ----------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  h_bond_infos: List[Tuple[int, str]]
    A list of tuple `(atom_index, hydrogen_bonding_type)`.
    Basically, it is expected that this value is the return value of
    `construct_hydrogen_bonding_info`. The `hydrogen_bonding_type`
    value is "Acceptor" or "Donor".
  use_chirality: bool
    Whether to use chirality information or not.
  use_partial_charge: bool
    Whether to use partial charge data or not.

  Returns
  -------
  np.ndarray
    A one-hot vector of the atom feature.
  """
    atom_type = get_atom_type_one_hot(atom)
    formal_charge = get_atom_formal_charge(atom)
    hybridization = get_atom_hybridization_one_hot(atom)
    acceptor_donor = get_atom_hydrogen_bonding_one_hot(atom, h_bond_infos)
    aromatic = get_atom_is_in_aromatic_one_hot(atom)
    degree = get_atom_total_degree_one_hot(atom)
    total_num_Hs = get_atom_total_num_Hs_one_hot(atom)
    atom_feat = np.concatenate([
        atom_type, formal_charge, hybridization, acceptor_donor, aromatic,
        degree, total_num_Hs
    ])

    if use_chirality:
        chirality = get_atom_chirality_one_hot(atom)
        atom_feat = np.concatenate([atom_feat, chirality])

    if use_partial_charge:
        partial_charge = get_atom_partial_charge(atom)
        atom_feat = np.concatenate([atom_feat, partial_charge])
    return atom_feat
Esempio n. 5
0
def _construct_atom_feature(atom: RDKitAtom, h_bond_infos: List[Tuple[int,
                                                                      str]],
                            use_chirality: bool,
                            use_partial_charge: bool) -> np.ndarray:
    """Construct an atom feature from a RDKit atom object.
  Parameters
  ----------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  h_bond_infos: List[Tuple[int, str]]
    A list of tuple `(atom_index, hydrogen_bonding_type)`.
    Basically, it is expected that this value is the return value of
    `construct_hydrogen_bonding_info`. The `hydrogen_bonding_type`
    value is "Acceptor" or "Donor".
  use_chirality: bool
    Whether to use chirality information or not.
  use_partial_charge: bool
    Whether to use partial charge data or not.
  Returns
  -------
  np.ndarray
    A one-hot vector of the atom feature.
    44+1+5+2+1+12+6+8+7+1+1+2+1 = 91 features
  """
    atom_type = get_atom_type_one_hot(atom,
                                      USER_ATOM_TYPE_SET,
                                      include_unknown_set=True)
    formal_charge = get_atom_formal_charge(atom)
    hybridization = get_atom_hybridization_one_hot(atom,
                                                   USER_HYBRIDIZATION_SET,
                                                   include_unknown_set=False)
    acceptor_donor = get_atom_hydrogen_bonding_one_hot(atom, h_bond_infos)
    aromatic = get_atom_is_in_aromatic_one_hot(atom)
    degree = get_atom_total_degree_one_hot(atom,
                                           USER_TOTAL_DEGREE_SET,
                                           include_unknown_set=True)
    total_num_Hs = get_atom_total_num_Hs_one_hot(atom,
                                                 DEFAULT_TOTAL_NUM_Hs_SET,
                                                 include_unknown_set=True)
    atom_feat = np.concatenate([
        atom_type, formal_charge, hybridization, acceptor_donor, aromatic,
        degree, total_num_Hs
    ])

    ### user additional features ####
    if True:
        imp_valence = get_atom_implicit_valence_one_hot(
            atom, DEFAULT_ATOM_IMPLICIT_VALENCE_SET, include_unknown_set=True)
        exp_valence = get_atom_explicit_valence_one_hot(
            atom, DEFAULT_ATOM_EXPLICIT_VALENCE_SET, include_unknown_set=True)
        atom_feat = np.concatenate([
            atom_feat,
            imp_valence,
            exp_valence,
            [
                atom.HasProp('_ChiralityPossible'),
                atom.GetNumRadicalElectrons()
            ],
        ])
    ###########    END    ############

    if use_chirality:
        # chirality = get_atom_chirality_one_hot(atom)
        chirality = get_atom_chirality_one_hot(atom)
        atom_feat = np.concatenate([atom_feat, np.array(chirality)])

    if use_partial_charge:
        partial_charge = get_atom_partial_charge(atom)
        atom_feat = np.concatenate([atom_feat, np.array(partial_charge)])
    return atom_feat