예제 #1
0
def remove_bridge(molecule: Mol, root_pattern_smiles: str,
                  removal_indices: List[int]) -> Optional[Mol]:
    root_pattern = Chem.MolFromSmiles(root_pattern_smiles)
    matches = molecule.GetSubstructMatches(root_pattern)
    if len(matches) == 0:
        return None
    match = matches[0]

    e_mol = Chem.EditableMol(molecule)
    indexes_to_delete = list(map(lambda i: match[i], removal_indices))
    indexes_to_delete.sort(reverse=True)
    for i in indexes_to_delete:
        e_mol.RemoveAtom(i)
    molecule = e_mol.GetMol()
    return get_largest_fragment(molecule)
예제 #2
0
def construct_mol_features(mol: rdchem.Mol,
                           out_size: Optional[int] = -1) -> np.ndarray:
    """Returns the atom features of all the atoms in the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest.

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    mol_feats: np.ndarray, shape=(n,m)
        Where `n` is the total number of atoms within the molecule, and 
        `m` is the number of feats.
    """
    # Caluclate charges and chirality of atoms within molecule
    rdPartialCharges.ComputeGasteigerCharges(
        mol)  # stored under _GasteigerCharge
    rdmolops.AssignStereochemistry(
        mol)  # stored under _CIPCode, see doc for more info

    # Retrieve atom index locations of matches
    HYDROGEN_DONOR = rdmolfiles.MolFromSmarts(
        "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]")
    HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" +
        ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," +
        "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]")
    ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    BASIC = rdmolfiles.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" +
        ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," +
        "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]")
    hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR),
                                  ())
    acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ())
    basic_match = sum(mol.GetSubstructMatches(BASIC), ())

    # Get ring info
    ring = mol.GetRingInfo()

    mol_feats = []
    n_atoms = mol.GetNumAtoms()
    for atom_idx in range(n_atoms):
        atom = mol.GetAtomWithIdx(atom_idx)

        atom_feats = []
        atom_feats += one_hot(atom.GetSymbol(), [
            'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na',
            'Sn', 'Se', 'other'
        ])
        atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetHybridization(),
                              list(rdchem.HybridizationType.names.values()))
        atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3])
        g_charge = float(atom.GetProp("_GasteigerCharge"))
        atom_feats += [g_charge] if not np.isnan(g_charge) else [0.]
        atom_feats += [atom.GetIsAromatic()]

        atom_feats += [
            ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9)
        ]
        atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

        # Chirality
        try:
            atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [
                atom.HasProp("_ChiralityPossible")
            ]
        except:
            atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")]
        # Hydrogen bonding
        atom_feats += [atom_idx in hydrogen_donor_match]
        atom_feats += [atom_idx in hydrogen_acceptor_match]
        # Is Acidic/Basic
        atom_feats += [atom_idx in acidic_match]
        atom_feats += [atom_idx in basic_match]

        mol_feats.append(atom_feats)

    if out_size < 0:
        return np.array(mol_feats, dtype=np.float)
    elif out_size >= n_atoms:
        # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols
        # NOTE: len(mol_feats[0]) is the number of feats
        padded_mol_feats = np.zeros((out_size, len(mol_feats[0])),
                                    dtype=np.float)
        padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float)
        return padded_mol_feats
    else:
        raise ValueError(
            '`out_size` (N={}) must be negative or larger than or '
            'equal to the number of atoms in the input molecules (N={}).'.
            format(out_size, n_atoms))