Beispiel #1
0
    def atom_features(self,
                      atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]:
        """
        Builds a feature vector for an atom.

        :param atom: An RDKit atom.
        :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
        :return: A list containing the atom features.
        """
        features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
                   onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
                   onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \
                   onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \
                   onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
                   onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \
                   [1 if atom.GetIsAromatic() else 0] + \
                   [atom.GetMass() * 0.01]
        atom_idx = atom.GetIdx()
        features = features + \
                   onek_encoding_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) + \
                   [atom_idx in self.hydrogen_acceptor_match] + \
                   [atom_idx in self.hydrogen_donor_match] + \
                   [atom_idx in self.acidic_match] + \
                   [atom_idx in self.basic_match] + \
                   [self.ring_info.IsAtomInRingOfSize(atom_idx, 3),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 4),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 5),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 6),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 7),
                    self.ring_info.IsAtomInRingOfSize(atom_idx, 8)]
        return features
def atom_features(
        atom: Chem.rdchem.Atom,
        mol: Chem.rdchem.Mol = None,
        functional_groups: List[int] = None) -> List[Union[bool, int, float]]:
    """
    Builds a feature vector for an atom.

    :param atom: An RDKit atom.
    :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
    :return: A list containing the atom features.
    """
    features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
           onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
           onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
           onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \
           [1 if atom.IsInRing() else 0] + \
           atom_in_member_rings(atom) + \
           [atom.GetMass() * 0.01]  # scaled to about the same range as other features
    '''features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
           onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
           onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \
           onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
           onek_encoding_unk(int(atom.GetFormalCharge()), ATOM_FEATURES['formal_charge']) + \
           [1 if atom.GetIsAromatic() else 0] + \
           [atom.GetMass() * 0.01]  # scaled to about the same range as other features'''
    if functional_groups is not None:
        features += functional_groups
    '''if mol is not None:
        #features += is_zwitterion(mol)
        features += is_hetroatomic_cyclic(mol)'''
    return features
Beispiel #3
0
def atom_features(atom: Chem.rdchem.Atom,
                  args) -> List[Union[bool, int, float]]:
    """
    Builds a feature vector for an atom.

    :param atom: An RDKit atom.
    :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
    :return: A list containing the atom features.
    """
    features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \
           onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
           onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge'])
    features +=  onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
           onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \
           [1 if atom.GetIsAromatic() else 0] + \
           [atom.GetMass() * 0.01]  # scaled to about the same range as other features
    if args.chiral_features:
        features += onek_encoding_unk(int(atom.GetChiralTag()),
                                      ATOM_FEATURES['chiral_tag'])
    if args.global_chiral_features:
        if atom.HasProp('_CIPCode'):
            features += onek_encoding_unk(atom.GetProp('_CIPCode'),
                                          ATOM_FEATURES['global_chiral_tag'])
        else:
            features += onek_encoding_unk(None,
                                          ATOM_FEATURES['global_chiral_tag'])
    return features
Beispiel #4
0
def atom_features(atom: Chem.rdchem.Atom,
                  functional_groups: List[int] = None) \
        -> List[Union[bool, int, float]]:
    '''
    Builds a feature vector for an atom.

    :param atom: An RDKit atom.
    :param functional_groups: A k-hot vector indicating the functional groups
    the atom belongs to.
    :return: A list containing the atom features.
    '''
    features = onek_encoding_unk(atom.GetAtomicNum() - 1,
                                 ATOM_FEATURES['atomic_num']) + \
        onek_encoding_unk(atom.GetTotalDegree(),
                          ATOM_FEATURES['degree']) + \
        onek_encoding_unk(atom.GetFormalCharge(),
                          ATOM_FEATURES['formal_charge']) + \
        onek_encoding_unk(int(atom.GetChiralTag()),
                          ATOM_FEATURES['chiral_tag']) + \
        onek_encoding_unk(int(atom.GetTotalNumHs()),
                          ATOM_FEATURES['num_Hs']) + \
        onek_encoding_unk(int(atom.GetHybridization()),
                          ATOM_FEATURES['hybridization']) + \
        [1 if atom.GetIsAromatic() else 0] + \
        [atom.GetMass() * 0.01]  # scaled to about same range as other features

    if functional_groups:
        features += functional_groups

    return features
Beispiel #5
0
def atom_features(
        atom: Chem.rdchem.Atom,
        functional_groups: List[int] = None) -> List[Union[bool, int, float]]:
    """
    Builds a feature vector for an atom.

    :param atom: An RDKit atom.
    :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
    :return: A list containing the atom features.
    """
    features = (
        onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES["atomic_num"])
        + onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES["degree"]) +
        onek_encoding_unk(atom.GetFormalCharge(),
                          ATOM_FEATURES["formal_charge"]) +
        onek_encoding_unk(int(atom.GetChiralTag()),
                          ATOM_FEATURES["chiral_tag"]) +
        onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES["num_Hs"]) +
        onek_encoding_unk(int(atom.GetHybridization()),
                          ATOM_FEATURES["hybridization"]) +
        [1 if atom.GetIsAromatic() else 0] + [atom.GetMass() * 0.01]
    )  # scaled to about the same range as other features
    if functional_groups is not None:
        features += functional_groups
    return features
def atom_features(
        atom: Chem.rdchem.Atom,
        functional_groups: List[int] = None) -> List[Union[bool, int, float]]:
    """
    Builds a feature vector for an atom.

    :param atom: An RDKit atom.
    :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to.
    :return: A list containing the atom features.
    """
    features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
           onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
           onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
           [atom.GetMass() * 0.01]  # scaled to about the same range as other features
    features += [
        atom.IsInRingSize(3),
        atom.IsInRingSize(4),
        atom.IsInRingSize(5),
        atom.IsInRingSize(6),
        atom.IsInRingSize(7),
        atom.IsInRingSize(8),
        atom.IsInRingSize(9),
        atom.IsInRingSize(10),
    ]
    if functional_groups is not None:
        features += functional_groups
    return features
Beispiel #7
0
def atom_features(atom: Chem.rdchem.Atom, functional_groups: List[int] = None) -> List[Union[bool, int, float]]:
    features = onek_encoding_unk(atom.GetAtomicNum() - 1, ATOM_FEATURES['atomic_num']) + \
           onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
           onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \
           onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) + \
           onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
           onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \
           [1 if atom.GetIsAromatic() else 0] + \
           [atom.GetMass() * 0.01]  # scaled to about the same range as other features
    if functional_groups is not None:
        features += functional_groups
    return features
Beispiel #8
0
def atom_features(atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]:
    """
    Builds a feature vector for an atom.
    :param atom: An RDKit atom.
    :return: A list containing the atom features.
    """
    features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \
        [1 if atom.GetIsAromatic() else 0] + \
        onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \
        onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \
        onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \
        [atom.GetMass() * 0.01]  # scaled to about the same range as other features
    return features
Beispiel #9
0
def get_atom_features(atom:Chem.rdchem.Atom) -> np.ndarray:
    "Concats all atom features together and returns numpy array of bools"
    
    # elements from Duvenaud's original code. Note the absense of H(optional)
    potential_atoms_list = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br',
                            'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V',
                            'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se',
                            'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd',
                            'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'Unknown']
    
    encoded_element_list = one_of_k(atom.GetSymbol(), potential_atoms_list)
    encoded_degree_list = one_of_k(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) 
    encoded_num_hs_list = one_of_k(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])
    encoded_fc_list = one_of_k(atom.GetFormalCharge(), [-1,-2,1,2,0])
    encoded_implicit_valence_list = one_of_k(atom.GetImplicitValence(),
                                             [0, 1, 2, 3, 4, 5])
    
    feature_vector = np.array(encoded_element_list + encoded_degree_list + 
                      encoded_num_hs_list + encoded_implicit_valence_list +
                      encoded_fc_list + [atom.GetIsAromatic()])
    
    return feature_vector
def get_atom_features(atom: Chem.rdchem.Atom, hydrogen_acceptor_match,
                      hydrogen_donor_match, acidic_match, basic_match,
                      ring_info) -> List[Union[bool, int, float]]:
    features = []

    features += one_hot_vector(atom.GetAtomicNum() - 1,
                               [i for i in range(100)],
                               extra_category=True)
    features += one_hot_vector(atom.GetTotalDegree(), [0, 1, 2, 3, 4, 5],
                               extra_category=True)
    features += one_hot_vector(atom.GetFormalCharge(), [-1, -2, 1, 2, 0],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetChiralTag()), [0, 1, 2, 3],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetTotalNumHs()), [0, 1, 2, 3, 4],
                               extra_category=True)
    features += one_hot_vector(int(atom.GetHybridization()), [2, 3, 4, 5, 6],
                               extra_category=True)
    features += [1 if atom.GetIsAromatic() else 0]
    features += [atom.GetMass() * 0.01]
    features += one_hot_vector(atom.GetImplicitValence(),
                               [0, 1, 2, 3, 4, 5, 6],
                               extra_category=True)

    atom_idx = atom.GetIdx()
    features += [atom_idx in hydrogen_acceptor_match]
    features += [atom_idx in hydrogen_donor_match]
    features += [atom_idx in acidic_match]
    features += [atom_idx in basic_match]
    features += [
        ring_info.IsAtomInRingOfSize(atom_idx, 3),
        ring_info.IsAtomInRingOfSize(atom_idx, 4),
        ring_info.IsAtomInRingOfSize(atom_idx, 5),
        ring_info.IsAtomInRingOfSize(atom_idx, 6),
        ring_info.IsAtomInRingOfSize(atom_idx, 7),
        ring_info.IsAtomInRingOfSize(atom_idx, 8)
    ]

    return features