Exemple #1
0
    def atom_features(self,
                      atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]:
        """
        Builds a feature vector for an atom.

        :param atom: An RDKit atom.
        :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
        :return: A list containing the atom features.
        """
        features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
                   onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
                   onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \
                   onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \
                   onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
                   onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \
                   [1 if atom.GetIsAromatic() else 0] + \
                   [atom.GetMass() * 0.01]
        atom_idx = atom.GetIdx()
        features = features + \
                   onek_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \
                   [atom_idx in self.hydrogen_acceptor_match] + \
                   [atom_idx in self.hydrogen_donor_match] + \
                   [atom_idx in self.acidic_match] + \
                   [atom_idx in self.basic_match] + \
                   [self.ring_info.IsAtomInRingOfSize(atom_idx, 3),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 4),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 5),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 6),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 7),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 8)]
        return features
Exemple #2
0
def get_atom_features(atom:Chem.rdchem.Atom) -> np.ndarray:
    "Concats all atom features together and returns numpy array of bools"
    
    # elements from Duvenaud's original code. Note the absense of H(optional)
    potential_atoms_list = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br',
                            'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V',
                            'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se',
                            'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd',
                            'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'Unknown']
    
    encoded_element_list = one_of_k(atom.GetSymbol(), potential_atoms_list)
    encoded_degree_list = one_of_k(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) 
    encoded_num_hs_list = one_of_k(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])
    encoded_fc_list = one_of_k(atom.GetFormalCharge(), [-1,-2,1,2,0])
    encoded_implicit_valence_list = one_of_k(atom.GetImplicitValence(),
                                             [0, 1, 2, 3, 4, 5])
    
    feature_vector = np.array(encoded_element_list + encoded_degree_list + 
                      encoded_num_hs_list + encoded_implicit_valence_list +
                      encoded_fc_list + [atom.GetIsAromatic()])
    
    return feature_vector
def get_atom_features(atom: Chem.rdchem.Atom, hydrogen_acceptor_match,
                      hydrogen_donor_match, acidic_match, basic_match,
                      ring_info) -> List[Union[bool, int, float]]:
    features = []

    features += one_hot_vector(atom.GetAtomicNum() - 1,
                               [i for i in range(100)],
                               extra_category=True)
    features += one_hot_vector(atom.GetTotalDegree(), [0, 1, 2, 3, 4, 5],
                               extra_category=True)
    features += one_hot_vector(atom.GetFormalCharge(), [-1, -2, 1, 2, 0],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetChiralTag()), [0, 1, 2, 3],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetTotalNumHs()), [0, 1, 2, 3, 4],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetHybridization()), [2, 3, 4, 5, 6],
                               extra_category=True)
    features += [1 if atom.GetIsAromatic() else 0]
    features += [atom.GetMass() * 0.01]
    features += one_hot_vector(atom.GetImplicitValence(),
                               [0, 1, 2, 3, 4, 5, 6],
                               extra_category=True)

    atom_idx = atom.GetIdx()
    features += [atom_idx in hydrogen_acceptor_match]
    features += [atom_idx in hydrogen_donor_match]
    features += [atom_idx in acidic_match]
    features += [atom_idx in basic_match]
    features += [
        ring_info.IsAtomInRingOfSize(atom_idx, 3),
        ring_info.IsAtomInRingOfSize(atom_idx, 4),
        ring_info.IsAtomInRingOfSize(atom_idx, 5),
        ring_info.IsAtomInRingOfSize(atom_idx, 6),
        ring_info.IsAtomInRingOfSize(atom_idx, 7),
        ring_info.IsAtomInRingOfSize(atom_idx, 8)
    ]

    return features