Beispiel #1
0
def get_atom_explicit_valence_one_hot(
        atom: RDKitAtom,
        allowable_set: List[int] = DEFAULT_ATOM_EXPLICIT_VALENCE_SET,
        include_unknown_set: bool = True) -> List[float]:
    """Get an one-hot feature of explicit valence of an atom.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[int]
    Atom explicit valence to consider. The default set is `[1, ..., 6]`
  include_unknown_set: bool, default True
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.

  Returns
  -------
  List[float]
    A one-hot vector of explicit valence an atom has.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.

  """
    return one_hot_encode(atom.GetExplicitValence(), allowable_set,
                          include_unknown_set)
def get_atom_hydrogen_bonding_one_hot(
    atom: RDKitAtom, hydrogen_bonding: List[Tuple[int, str]]) -> List[float]:
  """Get an one-hot feat about whether an atom accepts electrons or donates electrons.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  hydrogen_bonding: List[Tuple[int, str]]
    The return value of `construct_hydrogen_bonding_info`.
    The value is a list of tuple `(atom_index, hydrogen_bonding)` like (1, "Acceptor").

  Returns
  -------
  List[float]
    A one-hot vector of the ring size type. The first element
    indicates "Donor", and the second element indicates "Acceptor".
  """
  one_hot = [0.0, 0.0]
  atom_idx = atom.GetIdx()
  for hydrogen_bonding_tuple in hydrogen_bonding:
    if hydrogen_bonding_tuple[0] == atom_idx:
      if hydrogen_bonding_tuple[1] == "Donor":
        one_hot[0] = 1.0
      elif hydrogen_bonding_tuple[1] == "Acceptor":
        one_hot[1] = 1.0
  return one_hot
Beispiel #3
0
  def atom_features(self, atom: RDKitAtom) -> np.ndarray:
    """
    Deepchem already contains an atom_features function, however we are defining a new one here due to the need to handle features specific to MAT.
    Since we need new features like Atom GetNeighbors and IsInRing, and the number of features required for MAT is a fraction of what the Deepchem atom_features function computes, we can speed up computation by defining a custom function.

    Parameters
    ----------
    atom: RDKitAtom
      RDKit Atom object.

    Returns
    ----------
    ndarray
      Numpy array containing atom features.
    """
    attrib = []
    attrib += one_hot_encode(atom.GetAtomicNum(),
                             [5, 6, 7, 8, 9, 15, 16, 17, 35, 53, 999])
    attrib += one_hot_encode(len(atom.GetNeighbors()), [0, 1, 2, 3, 4, 5])
    attrib += one_hot_encode(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

    attrib += one_hot_encode(atom.GetFormalCharge(),
                             [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])

    attrib.append(atom.IsInRing())
    attrib.append(atom.GetIsAromatic())

    return np.array(attrib, dtype=np.float32)
Beispiel #4
0
def get_atom_ring_size_one_hot(
        atom: RDKitAtom,
        sssr: Sequence,
        allowable_set: List[int] = DEFAULT_RING_SIZE_SET,
        include_unknown_set: bool = False) -> List[float]:
    """Get an one-hot feature about the ring size if an atom is in a ring.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  sssr: Sequence
    The return value of `Chem.GetSymmSSSR(mol)`.
    The value is a sequence of rings.
  allowable_set: List[int]
    The ring size types to consider. The default set is `[3, 4, ..., 8]`.
  include_unknown_set: bool, default False
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.

  Returns
  -------
  List[float]
    A one-hot vector of the ring size type.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.
  """
    one_hot = [0.0 for _ in range(len(allowable_set))]
    atom_index = atom.GetIdx()
    if atom.IsInRing():
        for ring in sssr:
            ring = list(ring)
            if atom_index in ring:
                ring_size = len(ring)
                try:
                    one_hot[DEFAULT_RING_SIZE_SET.index(ring_size)] = 1.0
                except:
                    pass
    return one_hot
def get_atom_formal_charge(atom: RDKitAtom) -> List[float]:
  """Get a formal charge of an atom.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object

  Returns
  -------
  List[float]
    A vector of the formal charge.
  """
  return [float(atom.GetFormalCharge())]
def get_atom_is_in_aromatic_one_hot(atom: RDKitAtom) -> List[float]:
  """Get ans one-hot feature about whether an atom is in aromatic system or not.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object

  Returns
  -------
  List[float]
    A vector of whether an atom is in aromatic system or not.
  """
  return [float(atom.GetIsAromatic())]
def get_atom_partial_charge(atom: RDKitAtom) -> List[float]:
  """Get a partial charge of an atom.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object

  Returns
  -------
  List[float]
    A vector of the parital charge.

  Notes
  -----
  Before using this function, you must calculate `GasteigerCharge`
  like `AllChem.ComputeGasteigerCharges(mol)`.
  """
  gasteiger_charge = atom.GetProp('_GasteigerCharge')
  if gasteiger_charge in ['-nan', 'nan', '-inf', 'inf']:
    gasteiger_charge = 0.0
  return [float(gasteiger_charge)]
def get_atom_type_one_hot(atom: RDKitAtom,
                          allowable_set: List[str] = DEFAULT_ATOM_TYPE_SET,
                          include_unknown_set: bool = True) -> List[float]:
  """Get an one-hot feature of an atom type.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[str]
    The atom types to consider. The default set is
    `["C", "N", "O", "F", "P", "S", "Cl", "Br", "I"]`.
  include_unknown_set: bool, default True
    If true, the index of all atom not in `allowable_set` is `len(allowable_set)`.

  Returns
  -------
  List[float]
    An one-hot vector of atom types.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.
  """
  return one_hot_encode(atom.GetSymbol(), allowable_set, include_unknown_set)
Beispiel #9
0
def get_atom_formal_charge_one_hot(
        atom: RDKitAtom,
        allowable_set: List[int] = DEFAULT_FORMAL_CHARGE_SET,
        include_unknown_set: bool = True) -> List[float]:
    """Get one hot encoding of formal charge of an atom.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[int]
    The degree to consider. The default set is `[-2, -1, ..., 2]`
  include_unknown_set: bool, default True
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.


  Returns
  -------
  List[float]
    A vector of the formal charge.
  """
    return one_hot_encode(atom.GetFormalCharge(), allowable_set,
                          include_unknown_set)
def get_atom_total_degree_one_hot(
    atom: RDKitAtom,
    allowable_set: List[int] = DEFAULT_TOTAL_DEGREE_SET,
    include_unknown_set: bool = True) -> List[float]:
  """Get an one-hot feature of the degree which an atom has.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[int]
    The degree to consider. The default set is `[0, 1, ..., 5]`
  include_unknown_set: bool, default True
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.

  Returns
  -------
  List[float]
    A one-hot vector of the degree which an atom has.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.
  """
  return one_hot_encode(atom.GetTotalDegree(), allowable_set,
                        include_unknown_set)
def get_atom_chirality_one_hot(atom: RDKitAtom) -> List[float]:
  """Get an one-hot feature about an atom chirality type.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object

  Returns
  -------
  List[float]
    A one-hot vector of the chirality type. The first element
    indicates "R", and the second element indicates "S".
  """
  one_hot = [0.0, 0.0]
  try:
    chiral_type = atom.GetProp('_CIPCode')
    if chiral_type == "R":
      one_hot[0] = 1.0
    elif chiral_type == "S":
      one_hot[1] = 1.0
  except:
    pass
  return one_hot
def get_atom_hybridization_one_hot(
    atom: RDKitAtom,
    allowable_set: List[str] = DEFAULT_HYBRIDIZATION_SET,
    include_unknown_set: bool = False) -> List[float]:
  """Get an one-hot feature of hybridization type.

  Parameters
  ---------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  allowable_set: List[str]
    The hybridization types to consider. The default set is `["SP", "SP2", "SP3"]`
  include_unknown_set: bool, default False
    If true, the index of all types not in `allowable_set` is `len(allowable_set)`.

  Returns
  -------
  List[float]
    An one-hot vector of the hybridization type.
    If `include_unknown_set` is False, the length is `len(allowable_set)`.
    If `include_unknown_set` is True, the length is `len(allowable_set) + 1`.
  """
  return one_hot_encode(
      str(atom.GetHybridization()), allowable_set, include_unknown_set)
Beispiel #13
0
def _construct_atom_feature(atom: RDKitAtom, h_bond_infos: List[Tuple[int,
                                                                      str]],
                            use_chirality: bool,
                            use_partial_charge: bool) -> np.ndarray:
    """Construct an atom feature from a RDKit atom object.
  Parameters
  ----------
  atom: rdkit.Chem.rdchem.Atom
    RDKit atom object
  h_bond_infos: List[Tuple[int, str]]
    A list of tuple `(atom_index, hydrogen_bonding_type)`.
    Basically, it is expected that this value is the return value of
    `construct_hydrogen_bonding_info`. The `hydrogen_bonding_type`
    value is "Acceptor" or "Donor".
  use_chirality: bool
    Whether to use chirality information or not.
  use_partial_charge: bool
    Whether to use partial charge data or not.
  Returns
  -------
  np.ndarray
    A one-hot vector of the atom feature.
    44+1+5+2+1+12+6+8+7+1+1+2+1 = 91 features
  """
    atom_type = get_atom_type_one_hot(atom,
                                      USER_ATOM_TYPE_SET,
                                      include_unknown_set=True)
    formal_charge = get_atom_formal_charge(atom)
    hybridization = get_atom_hybridization_one_hot(atom,
                                                   USER_HYBRIDIZATION_SET,
                                                   include_unknown_set=False)
    acceptor_donor = get_atom_hydrogen_bonding_one_hot(atom, h_bond_infos)
    aromatic = get_atom_is_in_aromatic_one_hot(atom)
    degree = get_atom_total_degree_one_hot(atom,
                                           USER_TOTAL_DEGREE_SET,
                                           include_unknown_set=True)
    total_num_Hs = get_atom_total_num_Hs_one_hot(atom,
                                                 DEFAULT_TOTAL_NUM_Hs_SET,
                                                 include_unknown_set=True)
    atom_feat = np.concatenate([
        atom_type, formal_charge, hybridization, acceptor_donor, aromatic,
        degree, total_num_Hs
    ])

    ### user additional features ####
    if True:
        imp_valence = get_atom_implicit_valence_one_hot(
            atom, DEFAULT_ATOM_IMPLICIT_VALENCE_SET, include_unknown_set=True)
        exp_valence = get_atom_explicit_valence_one_hot(
            atom, DEFAULT_ATOM_EXPLICIT_VALENCE_SET, include_unknown_set=True)
        atom_feat = np.concatenate([
            atom_feat,
            imp_valence,
            exp_valence,
            [
                atom.HasProp('_ChiralityPossible'),
                atom.GetNumRadicalElectrons()
            ],
        ])
    ###########    END    ############

    if use_chirality:
        # chirality = get_atom_chirality_one_hot(atom)
        chirality = get_atom_chirality_one_hot(atom)
        atom_feat = np.concatenate([atom_feat, np.array(chirality)])

    if use_partial_charge:
        partial_charge = get_atom_partial_charge(atom)
        atom_feat = np.concatenate([atom_feat, np.array(partial_charge)])
    return atom_feat