def atom_features(self, atom: RDKitAtom) -> np.ndarray: """ Deepchem already contains an atom_features function, however we are defining a new one here due to the need to handle features specific to MAT. Since we need new features like Atom GetNeighbors and IsInRing, and the number of features required for MAT is a fraction of what the Deepchem atom_features function computes, we can speed up computation by defining a custom function. Parameters ---------- atom: RDKitAtom RDKit Atom object. Returns ---------- ndarray Numpy array containing atom features. """ attrib = [] attrib += one_hot_encode(atom.GetAtomicNum(), [5, 6, 7, 8, 9, 15, 16, 17, 35, 53, 999]) attrib += one_hot_encode(len(atom.GetNeighbors()), [0, 1, 2, 3, 4, 5]) attrib += one_hot_encode(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) attrib += one_hot_encode(atom.GetFormalCharge(), [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]) attrib.append(atom.IsInRing()) attrib.append(atom.GetIsAromatic()) return np.array(attrib, dtype=np.float32)
def get_atom_formal_charge(atom: RDKitAtom) -> List[float]: """Get a formal charge of an atom. Parameters --------- atom: rdkit.Chem.rdchem.Atom RDKit atom object Returns ------- List[float] A vector of the formal charge. """ return [float(atom.GetFormalCharge())]
def get_atom_formal_charge_one_hot( atom: RDKitAtom, allowable_set: List[int] = DEFAULT_FORMAL_CHARGE_SET, include_unknown_set: bool = True) -> List[float]: """Get one hot encoding of formal charge of an atom. Parameters --------- atom: rdkit.Chem.rdchem.Atom RDKit atom object allowable_set: List[int] The degree to consider. The default set is `[-2, -1, ..., 2]` include_unknown_set: bool, default True If true, the index of all types not in `allowable_set` is `len(allowable_set)`. Returns ------- List[float] A vector of the formal charge. """ return one_hot_encode(atom.GetFormalCharge(), allowable_set, include_unknown_set)