def atom_features(atom: Chem.rdchem.Atom, args) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) features += onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features if args.chiral_features: features += onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) if args.global_chiral_features: if atom.HasProp('_CIPCode'): features += onek_encoding_unk(atom.GetProp('_CIPCode'), ATOM_FEATURES['global_chiral_tag']) else: features += onek_encoding_unk(None, ATOM_FEATURES['global_chiral_tag']) return features
def atom_features(self, atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \ onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] atom_idx = atom.GetIdx() features = features + \ onek_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \ [atom_idx in self.hydrogen_acceptor_match] + \ [atom_idx in self.hydrogen_donor_match] + \ [atom_idx in self.acidic_match] + \ [atom_idx in self.basic_match] + \ [self.ring_info.IsAtomInRingOfSize(atom_idx, 3), self.ring_info.IsAtomInRingOfSize(atom_idx, 4), self.ring_info.IsAtomInRingOfSize(atom_idx, 5), self.ring_info.IsAtomInRingOfSize(atom_idx, 6), self.ring_info.IsAtomInRingOfSize(atom_idx, 7), self.ring_info.IsAtomInRingOfSize(atom_idx, 8)] return features
def atom_features( atom: Chem.rdchem.Atom, functional_groups: List[int] = None) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features features += [ atom.IsInRingSize(3), atom.IsInRingSize(4), atom.IsInRingSize(5), atom.IsInRingSize(6), atom.IsInRingSize(7), atom.IsInRingSize(8), atom.IsInRingSize(9), atom.IsInRingSize(10), ] if functional_groups is not None: features += functional_groups return features
def getAtomFeatures(a: Chem.rdchem.Atom): return [ # a.GetIdx(), a.GetAtomicNum(), a.GetTotalValence(), a.GetFormalCharge(), a.GetDegree(), a.GetTotalDegree() - a.GetDegree(), a.GetNumRadicalElectrons(), int(a.GetIsAromatic()), a.GetMass(), Chem.GetPeriodicTable().GetRvdw(a.GetAtomicNum()) ]
def atom_features(atom: Chem.rdchem.Atom, functional_groups: List[int] = None) \ -> List[Union[bool, int, float]]: ''' Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. ''' features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \ onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] # scaled to about same range as other features if functional_groups: features += functional_groups return features
def atom_features( atom: Chem.rdchem.Atom, functional_groups: List[int] = None) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = ( onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES["atomic_num"]) + onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES["degree"]) + onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES["formal_charge"]) + onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES["chiral_tag"]) + onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES["num_Hs"]) + onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES["hybridization"]) + [1 if atom.GetIsAromatic() else 0] + [atom.GetMass() * 0.01] ) # scaled to about the same range as other features if functional_groups is not None: features += functional_groups return features
def parity_features(atom: Chem.rdchem.Atom) -> int: """ Returns the parity of an atom if it is a tetrahedral center. +1 if CW, -1 if CCW, and 0 if undefined/unknown :param atom: An RDKit atom. """ return CHIRALTAG_PARITY[atom.GetChiralTag()]
def is_transition_metal(at: Chem.rdchem.Atom) -> bool: """Check if atom is a transition metal. Args: at: an atom. """ n = at.GetAtomicNum() return (n >= 22 and n <= 29) or (n >= 40 and n <= 47) or (n >= 72 and n <= 79)
def atom_features(atom: Chem.rdchem.Atom, functional_groups: List[int] = None) -> List[Union[bool, int, float]]: features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \ onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features if functional_groups is not None: features += functional_groups return features
def atom_features( atom: Chem.rdchem.Atom, mol: Chem.rdchem.Mol = None, functional_groups: List[int] = None) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \ [1 if atom.IsInRing() else 0] + \ atom_in_member_rings(atom) + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features '''features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetFormalCharge()), ATOM_FEATURES['formal_charge']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features''' if functional_groups is not None: features += functional_groups '''if mol is not None: #features += is_zwitterion(mol) features += is_hetroatomic_cyclic(mol)''' return features
def get_atom_features(atom: Chem.rdchem.Atom, hydrogen_acceptor_match, hydrogen_donor_match, acidic_match, basic_match, ring_info) -> List[Union[bool, int, float]]: features = [] features += one_hot_vector(atom.GetAtomicNum() - 1, [i for i in range(100)], extra_category=True) features += one_hot_vector(atom.GetTotalDegree(), [0, 1, 2, 3, 4, 5], extra_category=True) features += one_hot_vector(atom.GetFormalCharge(), [-1, -2, 1, 2, 0], extra_category=True) features += one_hot_vector(int(atom.GetChiralTag()), [0, 1, 2, 3], extra_category=True) features += one_hot_vector(int(atom.GetTotalNumHs()), [0, 1, 2, 3, 4], extra_category=True) features += one_hot_vector(int(atom.GetHybridization()), [2, 3, 4, 5, 6], extra_category=True) features += [1 if atom.GetIsAromatic() else 0] features += [atom.GetMass() * 0.01] features += one_hot_vector(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6], extra_category=True) atom_idx = atom.GetIdx() features += [atom_idx in hydrogen_acceptor_match] features += [atom_idx in hydrogen_donor_match] features += [atom_idx in acidic_match] features += [atom_idx in basic_match] features += [ ring_info.IsAtomInRingOfSize(atom_idx, 3), ring_info.IsAtomInRingOfSize(atom_idx, 4), ring_info.IsAtomInRingOfSize(atom_idx, 5), ring_info.IsAtomInRingOfSize(atom_idx, 6), ring_info.IsAtomInRingOfSize(atom_idx, 7), ring_info.IsAtomInRingOfSize(atom_idx, 8) ] return features
def atom_features_zeros( atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom containing only the atom number information. :param atom: An RDKit atom. :return: A list containing the atom features. """ if atom is None: features = [0] * PARAMS.ATOM_FDIM else: features = onek_encoding_unk(atom.GetAtomicNum() - 1, PARAMS.ATOM_FEATURES['atomic_num']) + \ [0] * (PARAMS.ATOM_FDIM - PARAMS.MAX_ATOMIC_NUM - 1) #set other features to zero return features
def atom_in_member_rings(atom: Chem.rdchem.Atom): """ Show each atom of the molecule is involved in 3, 4, 5, 6, 7, 8 member rings """ vector = [ 0 ] * 7 # If value is not in the [3, 4, 5, 6, 7, 8], then the final element in the vector is 1. for index, member_ring in enumerate(range(3, 9)): if atom.IsInRingSize(member_ring): vector[index] = 1 if vector == [0] * 7: vector[-1] = 1 return vector
def atom_features(atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \ [1 if atom.GetIsAromatic() else 0] + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features return features
def get_atom_features(atom:Chem.rdchem.Atom) -> np.ndarray: "Concats all atom features together and returns numpy array of bools" # elements from Duvenaud's original code. Note the absense of H(optional) potential_atoms_list = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se', 'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'Unknown'] encoded_element_list = one_of_k(atom.GetSymbol(), potential_atoms_list) encoded_degree_list = one_of_k(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) encoded_num_hs_list = one_of_k(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) encoded_fc_list = one_of_k(atom.GetFormalCharge(), [-1,-2,1,2,0]) encoded_implicit_valence_list = one_of_k(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5]) feature_vector = np.array(encoded_element_list + encoded_degree_list + encoded_num_hs_list + encoded_implicit_valence_list + encoded_fc_list + [atom.GetIsAromatic()]) return feature_vector
def copy_atom(atom: Chem.rdchem.Atom) -> Chem.rdchem.Atom: new_atom = Chem.Atom(atom.GetSymbol()) new_atom.SetFormalCharge(atom.GetFormalCharge()) new_atom.SetAtomMapNum(atom.GetAtomMapNum()) return new_atom