Beispiel #1
0
def write_vina_pdbqt(mol, directory, flexible=True, name_id=None):
    """Write single PDBQT molecule to a given directory. For proteins use
    `flexible=False` to avoid encoding torsions. Additionally an name ID can
    be appended to a name to avoid conflicts.
    """
    if name_id is None:
        name_id = ''

    # We expect name such as 0_ZINC123456.pdbqt or simply ZINC123456.pdbqt if no
    # name_id is specified. All non alpha-numeric signs are replaced with underscore.
    mol_file = ('_'.join(
        filter(None, [str(name_id),
                      re.sub('[^A-Za-z0-9]+', '_', mol.title)])) + '.pdbqt')
    # prepend path to filename
    mol_file = os.path.join(directory, mol_file)

    if is_openbabel_molecule(mol):
        if flexible:
            # auto bonding (b), perserve atom indices (p) and Hs (h)
            kwargs = {'opt': {'b': None, 'p': None, 'h': None}}
        else:
            # for proteins write rigid mol (r) and combine all frags in one (c)
            kwargs = {'opt': {'r': None, 'c': None, 'h': None}}

    else:
        kwargs = {'flexible': flexible}

    mol.write('pdbqt', mol_file, overwrite=True, **kwargs)
    return mol_file
Beispiel #2
0
def shuffle_mol(mol):
    new_mol = mol.clone
    new_order = list(range(len(mol.atoms)))
    shuffle(new_order)
    if is_openbabel_molecule(mol):
        new_mol.OBMol.RenumberAtoms([i + 1 for i in new_order])
    else:
        new_mol.Mol = oddt.toolkits.rdk.Chem.RenumberAtoms(new_mol.Mol, new_order)
    return new_mol
Beispiel #3
0
def write_vina_pdbqt(mol, directory, flexible=True, name_id=None):
    """Write single PDBQT molecule to a given directory. For proteins use
    `flexible=False` to avoid encoding torsions. Additionally an name ID can
    be appended to a name to avoid conflicts.
    """
    if name_id is None:
        name_id = ''

    # We expect name such as 0_ZINC123456.pdbqt or simply ZINC123456.pdbqt if no
    # name_id is specified. All non alpha-numeric signs are replaced with underscore.
    mol_file = ('_'.join(
        filter(None, [str(name_id),
                      re.sub('[^A-Za-z0-9]+', '_', mol.title)])) + '.pdbqt')
    # prepend path to filename
    mol_file = os.path.join(directory, mol_file)

    if is_openbabel_molecule(mol):
        if flexible:
            # auto bonding (b), perserve atom indices (p) and Hs (h)
            kwargs = {'opt': {'b': None, 'p': None, 'h': None}}
        else:
            # for proteins write rigid mol (r) and combine all frags in one (c)
            kwargs = {'opt': {'r': None, 'c': None, 'h': None}}

    else:
        kwargs = {'flexible': flexible}

    # HACK: fix OB 2.3.2 PDBQT bugs
    if (not flexible and is_openbabel_molecule(mol)
            and oddt.toolkits.ob.__version__ < '2.4.0'):
        with open(mol_file, 'w') as f:
            for line in mol.write('pdbqt', overwrite=True,
                                  **kwargs).split('\n'):
                # remove OB 2.3 ROOT/ENDROOT tags
                if line in ['ROOT', 'ENDROOT']:
                    continue
                elif line[:7] == 'TORSDOF':
                    f.write('TER\n')
                else:
                    f.write(line + '\n')
    else:
        mol.write('pdbqt', mol_file, overwrite=True, **kwargs)
    return mol_file
Beispiel #4
0
def shuffle_mol(mol):
    """Randomly reorder molecule atoms and return a shuffled copy of input."""
    new_mol = mol.clone
    new_order = list(range(len(mol.atoms)))
    shuffle(new_order)
    if is_openbabel_molecule(mol):
        new_mol.OBMol.RenumberAtoms([i + 1 for i in new_order])
    else:
        new_mol.Mol = oddt.toolkits.rdk.Chem.RenumberAtoms(new_mol.Mol, new_order)
    return new_mol
Beispiel #5
0
def write_vina_pdbqt(mol, directory, flexible=True, name_id=None):
    """Write single PDBQT molecule to a given directory. For proteins use
    `flexible=False` to avoid encoding torsions. Additionally an name ID can
    be appended to a name to avoid conflicts.
    """
    if name_id is None:
        name_id = ''

    # We expect name such as 0_ZINC123456.pdbqt or simply ZINC123456.pdbqt if no
    # name_id is specified. All non alpha-numeric signs are replaced with underscore.
    mol_file = ('_'.join(filter(None, [str(name_id),
                                       re.sub('[^A-Za-z0-9]+', '_', mol.title)]
                                )) + '.pdbqt')
    # prepend path to filename
    mol_file = os.path.join(directory, mol_file)

    if is_openbabel_molecule(mol):
        if flexible:
            # auto bonding (b), perserve atom names (n) indices (p) and Hs (h)
            kwargs = {'opt': {'b': None, 'p': None, 'h': None, 'n': None}}
        else:
            # for proteins write rigid mol (r) and combine all frags in one (c)
            kwargs = {'opt': {'r': None, 'c': None, 'h': None}}

    else:
        kwargs = {'flexible': flexible}

    # HACK: fix OB 2.3.2 PDBQT bugs
    if (not flexible and is_openbabel_molecule(mol) and
            oddt.toolkits.ob.__version__ < '2.4.0'):
        with open(mol_file, 'w') as f:
            for line in mol.write('pdbqt', overwrite=True, **kwargs).split('\n'):
                # remove OB 2.3 ROOT/ENDROOT tags
                if line in ['ROOT', 'ENDROOT']:
                    continue
                elif line[:7] == 'TORSDOF':
                    f.write('TER\n')
                else:
                    f.write(line + '\n')
    else:
        mol.write('pdbqt', mol_file, overwrite=True, **kwargs)
    return mol_file
Beispiel #6
0
def get_atom_environments(mol, root_atom_idx, depth):
    """Get circular environments of atom indices up to certain depth.
    Atoms from each depth are kept separate.
    BFS search is done until atom outside of given depth is found.

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Molecule object containing environments

    root_atom_idx : int
        0-based index of root atom for all environments

    depth : int
        Maximum depth of environments to return

    Returns
    -------
    envs: list (size = depth + 1)
        List of atoms at each respective environment depth
    """

    if is_openbabel_molecule(mol):
        envs = OrderedDict([(i, []) for i in range(depth + 1)])
        last_depth = 0
        for atom, current_depth in oddt.toolkits.ob.ob.OBMolAtomBFSIter(mol.OBMol,
                                                                        root_atom_idx + 1):
            # FIX for disconnected fragments in OB
            if ((current_depth > depth + 1) or
                    (last_depth > current_depth) or
                    (last_depth == 1 and current_depth == 1)):
                break
            last_depth = current_depth
            if atom.GetAtomicNum() == 1:
                continue
            envs[current_depth - 1].append(atom.GetIdx() - 1)
        envs = list(envs.values())
    else:
        envs = [[root_atom_idx]]
        visited = [root_atom_idx]
        for r in range(1, depth + 1):
            current_depth_atoms = []
            for atom_idx in envs[r - 1]:
                for neighbor in mol.Mol.GetAtomWithIdx(atom_idx).GetNeighbors():
                    if neighbor.GetAtomicNum() == 1:
                        continue
                    n_idx = neighbor.GetIdx()
                    if n_idx not in visited and n_idx not in current_depth_atoms:
                        current_depth_atoms.append(n_idx)
                        visited.append(n_idx)
            envs.append(current_depth_atoms)
    return envs
Beispiel #7
0
def get_molecular_shingles(mol, depth=2, atom_idxs=None):
    """Get molecular shingles of given depth. They are equivalent to ECFP environments,
    but use SMILES as a representation for each environment.

    Parameters
    ----------
    mol: oddt.toolkit.Molecule instance
        Query molecule object

    detpth: int (default=2)
        Bond depth of environtment that is used for shingles generation

    atom_idxs: iterable of ints or None (default=None)
        Which atoms to use for shingles generation. By default use all atoms.

    Returns
    -------
    shingles: list
        List of molecular shingles (canonical SMILES)

    References
    ----------
        https://doi.org/10.1186/s13321-018-0321-8
    """
    shingles = []
    atom_idxs = atom_idxs or range(len(mol.atoms))
    for atom_idx in atom_idxs:
        env = list(
            chain.from_iterable(
                get_atom_environments(mol, root_atom_idx=atom_idx,
                                      depth=depth)))
        if is_openbabel_molecule(mol):
            atom_idx_string = ' '.join(str(i + 1)
                                       for i in env)  # this is one-based
            # OB fragment smiles contains names and whitespaces
            fragment_smiles = mol.write('smi',
                                        opt={
                                            'c': None,
                                            'F': atom_idx_string
                                        }).strip().split()[0]
            shingles.append(fragment_smiles)

        else:
            fragment_smiles = oddt.toolkit.Chem.MolFragmentToSmiles(
                mol.Mol, atomsToUse=env, isomericSmiles=True)
            shingles.append(fragment_smiles)

    return shingles
Beispiel #8
0
def _ECFP_atom_repr(mol, idx, use_pharm_features=False):
    """Simple description of atoms used in ECFP/FCFP. Bonds are not described
    accounted for. Hydrogens are explicitly forbidden, they raise Exception.

    Reference:
    Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model.
    2010;50: 742-754. http://dx.doi.org/10.1021/ci100050t

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Input molecule for the FP calculations

    idx : int
        Root atom index (0-based).

    use_pharm_features : bool (default=False)
        Switch to use pharmacophoric features as atom representation instead of
        explicit atomic numbers etc.

    Returns
    -------
    atom_repr : tuple (size=6 or 7)
        Atom type desctiption or pharmacophoric features of atom.
    """
    if use_pharm_features:
        atom_dict = mol.atom_dict[idx]
        if atom_dict['atomicnum'] == 1:
            raise Exception('ECFP should not hash Hydrogens')
        return (int(atom_dict['isdonor']),
                int(atom_dict['isacceptor']),
                int(atom_dict['ishydrophobe']),
                int(atom_dict['isplus']),
                int(atom_dict['isminus']),
                int(atom_dict['isaromatic']))

    else:
        max_ring_size = 10  # dont catch macromolecular rings
        if is_openbabel_molecule(mol):
            atom = mol.OBMol.GetAtom(idx + 1)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            # OB 3.0 compatibility
            if hasattr(atom, 'GetHvyValence'):
                heavy_degree = atom.GetHvyValence()
            else:
                heavy_degree = atom.GetHvyDegree()
            if hasattr(atom, 'ImplicitHydrogenCount'):
                hs_count = atom.ImplicitHydrogenCount() + atom.ExplicitHydrogenCount()
            else:
                hs_count = atom.GetTotalDegree() - heavy_degree
            return (atom.GetAtomicNum(),
                    atom.GetIsotope(),
                    heavy_degree,
                    hs_count,
                    atom.GetFormalCharge(),
                    int(0 < atom.MemberOfRingSize() <= max_ring_size),
                    int(atom.IsAromatic()),)
        else:
            atom = mol.Mol.GetAtomWithIdx(idx)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            n_hs = atom.GetTotalNumHs(includeNeighbors=True)

            # get ring info for atom and check rign size
            isring = False
            if atom.IsInRing():
                # FIXME: this is not efficient, fixed by rdkit/rdkit#1859
                isring = any(atom.IsInRingSize(size)
                             for size in range(3, max_ring_size + 1))

            return (atom.GetAtomicNum(),
                    atom.GetIsotope(),
                    atom.GetTotalDegree() - n_hs,
                    n_hs,
                    atom.GetFormalCharge(),
                    int(isring),
                    int(atom.GetIsAromatic()),)
Beispiel #9
0
def _ECFP_atom_hash(mol, idx, depth=2, use_pharm_features=False,
                    atom_repr_dict=None):
    """Generate hashed environments for single atom up to certain depth
    (bond-wise). Hydrogens are ignored during neighbor lookup.

    Reference:
    Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model.
    2010;50: 742-754. http://dx.doi.org/10.1021/ci100050t

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Input molecule for the FP calculations

    idx : int
        Root atom index (0-based).

    depth : int (deafult = 2)
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.

    use_pharm_features : bool (default=False)
        Switch to use pharmacophoric features as atom representation instead of
        explicit atomic numbers etc.

    Returns
    -------
    environment_hashes : list of ints
        Hashed environments for certain atom
    """
    if is_openbabel_molecule(mol):
        envs = OrderedDict([(i, []) for i in range(depth + 1)])
        last_depth = 0
        for atom, current_depth in oddt.toolkits.ob.ob.OBMolAtomBFSIter(mol.OBMol,
                                                                        idx + 1):
            # FIX for disconnected fragments in OB
            if ((current_depth > depth + 1) or
                    (last_depth > current_depth) or
                    (last_depth == 1 and current_depth == 1)):
                break
            last_depth = current_depth
            if atom.GetAtomicNum() == 1:
                continue
            envs[current_depth - 1].append(atom.GetIdx() - 1)
        envs = list(envs.values())
    else:
        envs = [[idx]]
        visited = [idx]
        for r in range(1, depth + 1):
            tmp = []
            for atom_idx in envs[r - 1]:
                for neighbor in mol.Mol.GetAtomWithIdx(atom_idx).GetNeighbors():
                    if neighbor.GetAtomicNum() == 1:
                        continue
                    n_idx = neighbor.GetIdx()
                    if n_idx not in visited and n_idx not in tmp:
                        tmp.append(n_idx)
                        visited.append(n_idx)
            envs.append(tmp)

    atom_env = []
    for r in range(1, depth + 2):  # there are depth + 1 elements, so +2
        atom_env.append(list(chain(*envs[:r])))

    # Get atom representation only once, pull indices from largest env
    if atom_repr_dict is None:
        atom_repr = [_ECFP_atom_repr(mol, aidx,
                                     use_pharm_features=use_pharm_features)
                     for aidx in atom_env[-1]]
    elif isinstance(atom_repr_dict, dict):
        atom_repr = [atom_repr_dict[aidx] for aidx in atom_env[-1]]
    else:
        raise ValueError('`atom_repr_dict` must be a dictionary, as atom idxs '
                         'do not need to be continuous (eg. missing Hs).')
    # Get atom invariants
    out_hash = []
    for layer in atom_env:
        layer_invariant = tuple(sorted(atom_repr[:len(layer)]))
        out_hash.append(hash32(layer_invariant))
    return out_hash
Beispiel #10
0
def _ECFP_atom_repr(mol, idx, use_pharm_features=False):
    """Simple description of atoms used in ECFP/FCFP. Bonds are not described
    accounted for. Hydrogens are explicitly forbidden, they raise Exception.

    Reference:
    Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model.
    2010;50: 742-754. http://dx.doi.org/10.1021/ci100050t

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Input molecule for the FP calculations

    idx : int
        Root atom index (0-based).

    use_pharm_features : bool (default=False)
        Switch to use pharmacophoric features as atom representation instead of
        explicit atomic numbers etc.

    Returns
    -------
    atom_repr : tuple (size=6 or 7)
        Atom type desctiption or pharmacophoric features of atom.
    """
    if use_pharm_features:
        atom_dict = mol.atom_dict[idx]
        if atom_dict['atomicnum'] == 1:
            raise Exception('ECFP should not hash Hydrogens')
        return (int(atom_dict['isdonor']),
                int(atom_dict['isacceptor']),
                int(atom_dict['ishydrophobe']),
                int(atom_dict['isplus']),
                int(atom_dict['isminus']),
                int(atom_dict['isaromatic']))

    else:
        max_ring_size = 10  # dont catch macromolecular rings
        if is_openbabel_molecule(mol):
            atom = mol.OBMol.GetAtom(idx + 1)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            return (atom.GetAtomicNum(),
                    atom.GetIsotope(),
                    atom.GetHvyValence(),
                    atom.ImplicitHydrogenCount() + atom.ExplicitHydrogenCount(),
                    atom.GetFormalCharge(),
                    int(0 < atom.MemberOfRingSize() <= max_ring_size),
                    int(atom.IsAromatic()),)
        else:
            atom = mol.Mol.GetAtomWithIdx(idx)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            n_hs = atom.GetTotalNumHs(includeNeighbors=True)

            # get ring info for atom and check rign size
            isring = False
            if atom.IsInRing():
                # FIXME: this is not efficient, fixed by rdkit/rdkit#1859
                isring = any(atom.IsInRingSize(size)
                             for size in range(3, max_ring_size + 1))

            return (atom.GetAtomicNum(),
                    atom.GetIsotope(),
                    atom.GetTotalDegree() - n_hs,
                    n_hs,
                    atom.GetFormalCharge(),
                    int(isring),
                    int(atom.GetIsAromatic()),)
Beispiel #11
0
def _ECFP_atom_hash(mol,
                    idx,
                    depth=2,
                    use_pharm_features=False,
                    atom_repr_dict=None):
    """Generate hashed environments for single atom up to certain depth
    (bond-wise). Hydrogens are ignored during neighbor lookup.

    Reference:
    Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model.
    2010;50: 742-754. http://dx.doi.org/10.1021/ci100050t

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Input molecule for the FP calculations

    idx : int
        Root atom index (0-based).

    depth : int (deafult = 2)
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.

    use_pharm_features : bool (default=False)
        Switch to use pharmacophoric features as atom representation instead of
        explicit atomic numbers etc.

    Returns
    -------
    environment_hashes : list of ints
        Hashed environments for certain atom
    """
    if is_openbabel_molecule(mol):
        envs = OrderedDict([(i, []) for i in range(depth + 1)])
        last_depth = 0
        for atom, current_depth in oddt.toolkits.ob.ob.OBMolAtomBFSIter(
                mol.OBMol, idx + 1):
            # FIX for disconnected fragments in OB
            if ((current_depth > depth + 1) or (last_depth > current_depth)
                    or (last_depth == 1 and current_depth == 1)):
                break
            last_depth = current_depth
            if atom.GetAtomicNum() == 1:
                continue
            envs[current_depth - 1].append(atom.GetIdx() - 1)
        envs = list(envs.values())
    else:
        envs = [[idx]]
        visited = [idx]
        for r in range(1, depth + 1):
            tmp = []
            for atom_idx in envs[r - 1]:
                for neighbor in mol.Mol.GetAtomWithIdx(
                        atom_idx).GetNeighbors():
                    if neighbor.GetAtomicNum() == 1:
                        continue
                    n_idx = neighbor.GetIdx()
                    if n_idx not in visited and n_idx not in tmp:
                        tmp.append(n_idx)
                        visited.append(n_idx)
            envs.append(tmp)

    atom_env = []
    for r in range(1, depth + 2):  # there are depth + 1 elements, so +2
        atom_env.append(list(chain(*envs[:r])))

    # Get atom representation only once, pull indices from largest env
    if atom_repr_dict is None:
        atom_repr = [
            _ECFP_atom_repr(mol, aidx, use_pharm_features=use_pharm_features)
            for aidx in atom_env[-1]
        ]
    elif isinstance(atom_repr_dict, dict):
        atom_repr = [atom_repr_dict[aidx] for aidx in atom_env[-1]]
    else:
        raise ValueError('`atom_repr_dict` must be a dictionary, as atom idxs '
                         'do not need to be continuous (eg. missing Hs).')
    # Get atom invariants
    out_hash = []
    for layer in atom_env:
        layer_invariant = tuple(sorted(atom_repr[:len(layer)]))
        out_hash.append(hash32(layer_invariant))
    return out_hash
Beispiel #12
0
def _ECFP_atom_repr(mol, idx, use_pharm_features=False):
    """Simple description of atoms used in ECFP/FCFP. Bonds are not described
    accounted for. Hydrogens are explicitly forbidden, they raise Exception.

    Reference:
    Rogers D, Hahn M. Extended-connectivity fingerprints. J Chem Inf Model.
    2010;50: 742-754. http://dx.doi.org/10.1021/ci100050t

    Parameters
    ----------
    mol : oddt.toolkit.Molecule object
        Input molecule for the FP calculations

    idx : int
        Root atom index (0-based).

    use_pharm_features : bool (default=False)
        Switch to use pharmacophoric features as atom representation instead of
        explicit atomic numbers etc.

    Returns
    -------
    atom_repr : tuple (size=6 or 7)
        Atom type desctiption or pharmacophoric features of atom.
    """
    if use_pharm_features:
        atom_dict = mol.atom_dict[idx]
        if atom_dict['atomicnum'] == 1:
            raise Exception('ECFP should not hash Hydrogens')
        return (int(atom_dict['isdonor']), int(atom_dict['isacceptor']),
                int(atom_dict['ishydrophobe']), int(atom_dict['isplus']),
                int(atom_dict['isminus']), int(atom_dict['isaromatic']))

    else:
        if is_openbabel_molecule(mol):
            atom = mol.OBMol.GetAtom(idx + 1)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            return (
                atom.GetAtomicNum(),
                atom.GetIsotope(),
                atom.GetHvyValence(),
                atom.ImplicitHydrogenCount() + atom.ExplicitHydrogenCount(),
                atom.GetFormalCharge(),
                int(atom.IsInRing()),
                int(atom.IsAromatic()),
            )
        else:
            atom = mol.Mol.GetAtomWithIdx(idx)
            if atom.GetAtomicNum() == 1:
                raise Exception('ECFP should not hash Hydrogens')
            n_hs = atom.GetTotalNumHs(includeNeighbors=True)
            return (
                atom.GetAtomicNum(),
                atom.GetIsotope(),
                atom.GetTotalDegree() - n_hs,
                n_hs,
                atom.GetFormalCharge(),
                int(atom.IsInRing()),
                int(atom.GetIsAromatic()),
            )
Beispiel #13
0
def rmsd(ref, mol, ignore_h=True, method=None, normalize=False):
    """Computes root mean square deviation (RMSD) between two molecules
    (including or excluding Hydrogens). No symmetry checks are performed.

    Parameters
    ----------
    ref : oddt.toolkit.Molecule object
        Reference molecule for the RMSD calculation

    mol : oddt.toolkit.Molecule object
        Query molecule for RMSD calculation

    ignore_h : bool (default=False)
        Flag indicating to ignore Hydrogen atoms while performing RMSD
        calculation. This toggle works only with 'hungarian' method and without
        sorting (method=None).

    method : str (default=None)
        The method to be used for atom asignment between ref and mol.
        None means that direct matching is applied, which is the default
        behavior.
        Available methods:
            - canonize - match heavy atoms using canonical ordering (it forces
            ignoring H's)
            - hungarian - minimize RMSD using Hungarian algorithm
            - min_symmetry - makes multiple molecule-molecule matches and finds
            minimal RMSD (the slowest). Hydrogens are ignored.

    normalize : bool (default=False)
        Normalize RMSD by square root of rot. bonds

    Returns
    -------
    rmsd : float
        RMSD between two molecules
    """

    if method == 'canonize':
        ref_atoms = ref.coords[ref.canonic_order]
        mol_atoms = mol.coords[mol.canonic_order]
    elif method == 'hungarian':
        mol_map = []
        ref_map = []
        for a_type in np.unique(mol.atom_dict['atomtype']):
            if a_type != 'H' or not ignore_h:
                mol_idx = np.argwhere(mol.atom_dict['atomtype'] == a_type).flatten()
                ref_idx = np.argwhere(ref.atom_dict['atomtype'] == a_type).flatten()
                if len(mol_idx) != len(ref_idx):
                    raise ValueError('Unequal number of atoms type: %s' % a_type)
                if len(mol_idx) == 1:
                    mol_map.append(mol_idx)
                    ref_map.append(ref_idx)
                    continue
                M = distance(mol.atom_dict['coords'][mol_idx],
                             ref.atom_dict['coords'][ref_idx])
                M = M - M.min(axis=0) - M.min(axis=1).reshape(-1, 1)
                tmp_mol, tmp_ref = linear_sum_assignment(M)
                mol_map.append(mol_idx[tmp_mol])
                ref_map.append(ref_idx[tmp_ref])
        mol_atoms = mol.atom_dict['coords'][np.hstack(mol_map)]
        ref_atoms = ref.atom_dict['coords'][np.hstack(ref_map)]
    elif method == 'min_symmetry':
        min_rmsd = None
        ref_atoms = ref.atom_dict[ref.atom_dict['atomicnum'] != 1]['coords']
        mol_atoms = mol.atom_dict[mol.atom_dict['atomicnum'] != 1]['coords']
        # safety swith to check if number of heavy atoms match
        if ref_atoms.shape == mol_atoms.shape:
            # match mol to ref, generate all matches to find best RMSD
            matches = oddt.toolkit.Smarts(ref).findall(mol, unique=False)
            if not matches:
                raise ValueError('Could not find any match between molecules.')
            # calculate RMSD between all matches and retain the smallest
            for match in matches:
                match = np.array(match, dtype=int)
                if is_openbabel_molecule(mol):
                    match -= 1  # OB has 1-based indices
                tmp_dict = mol.atom_dict[match]
                mol_atoms = tmp_dict[tmp_dict['atomicnum'] != 1]['coords']
                # following should not happen, although safety check is left
                if mol_atoms.shape != ref_atoms.shape:
                    raise Exception('Molecular match got wrong number of atoms.')
                rmsd = np.sqrt(((mol_atoms - ref_atoms)**2).sum(axis=-1).mean())
                if min_rmsd is None or rmsd < min_rmsd:
                    min_rmsd = rmsd
            return min_rmsd
    elif ignore_h:
        mol_atoms = mol.coords[mol.atom_dict['atomicnum'] != 1]
        ref_atoms = ref.coords[ref.atom_dict['atomicnum'] != 1]
    else:
        mol_atoms = mol.coords
        ref_atoms = ref.coords
    if mol_atoms.shape == ref_atoms.shape:
        rmsd = np.sqrt(((mol_atoms - ref_atoms)**2).sum(axis=-1).mean())
        if normalize:
            rmsd /= np.sqrt(mol.num_rotors)
        return rmsd
    # at this point raise an exception
    raise ValueError('Unequal number of atoms in molecules (%i and %i)'
                     % (len(mol_atoms), len(ref_atoms)))
Beispiel #14
0
def rmsd(ref, mol, ignore_h=True, method=None, normalize=False):
    """Computes root mean square deviation (RMSD) between two molecules
    (including or excluding Hydrogens). No symmetry checks are performed.

    Parameters
    ----------
    ref : oddt.toolkit.Molecule object
        Reference molecule for the RMSD calculation

    mol : oddt.toolkit.Molecule object
        Query molecule for RMSD calculation

    ignore_h : bool (default=False)
        Flag indicating to ignore Hydrogen atoms while performing RMSD
        calculation. This toggle works only with 'hungarian' method and without
        sorting (method=None).

    method : str (default=None)
        The method to be used for atom asignment between ref and mol.
        None means that direct matching is applied, which is the default
        behavior.
        Available methods:
            - canonize - match heavy atoms using canonical ordering (it forces
            ignoring H's)
            - hungarian - minimize RMSD using Hungarian algorithm
            - min_symmetry - makes multiple molecule-molecule matches and finds
            minimal RMSD (the slowest). Hydrogens are ignored.

    normalize : bool (default=False)
        Normalize RMSD by square root of rot. bonds

    Returns
    -------
    rmsd : float
        RMSD between two molecules
    """

    if method == 'canonize':
        ref_atoms = ref.coords[ref.canonic_order]
        mol_atoms = mol.coords[mol.canonic_order]
    elif method == 'hungarian':
        mol_map = []
        ref_map = []
        for a_type in np.unique(mol.atom_dict['atomtype']):
            if a_type != 'H' or not ignore_h:
                mol_idx = np.argwhere(
                    mol.atom_dict['atomtype'] == a_type).flatten()
                ref_idx = np.argwhere(
                    ref.atom_dict['atomtype'] == a_type).flatten()
                if len(mol_idx) != len(ref_idx):
                    raise ValueError('Unequal number of atoms type: %s' %
                                     a_type)
                if len(mol_idx) == 1:
                    mol_map.append(mol_idx)
                    ref_map.append(ref_idx)
                    continue
                M = distance(mol.atom_dict['coords'][mol_idx],
                             ref.atom_dict['coords'][ref_idx])
                M = M - M.min(axis=0) - M.min(axis=1).reshape(-1, 1)
                tmp_mol, tmp_ref = linear_sum_assignment(M)
                mol_map.append(mol_idx[tmp_mol])
                ref_map.append(ref_idx[tmp_ref])
        mol_atoms = mol.atom_dict['coords'][np.hstack(mol_map)]
        ref_atoms = ref.atom_dict['coords'][np.hstack(ref_map)]
    elif method == 'min_symmetry':
        min_rmsd = None
        ref_atoms = ref.atom_dict[ref.atom_dict['atomicnum'] != 1]['coords']
        mol_atoms = mol.atom_dict[mol.atom_dict['atomicnum'] != 1]['coords']
        # safety swith to check if number of heavy atoms match
        if ref_atoms.shape == mol_atoms.shape:
            # match mol to ref, generate all matches to find best RMSD
            matches = oddt.toolkit.Smarts(ref).findall(mol, unique=False)
            if not matches:
                raise ValueError('Could not find any match between molecules.')
            # calculate RMSD between all matches and retain the smallest
            for match in matches:
                match = np.array(match, dtype=int)
                if is_openbabel_molecule(mol):
                    match -= 1  # OB has 1-based indices
                tmp_dict = mol.atom_dict[match]
                mol_atoms = tmp_dict[tmp_dict['atomicnum'] != 1]['coords']
                # following should not happen, although safety check is left
                if mol_atoms.shape != ref_atoms.shape:
                    raise Exception(
                        'Molecular match got wrong number of atoms.')
                rmsd = np.sqrt(
                    ((mol_atoms - ref_atoms)**2).sum(axis=-1).mean())
                if min_rmsd is None or rmsd < min_rmsd:
                    min_rmsd = rmsd
            return min_rmsd
    elif ignore_h:
        mol_atoms = mol.coords[mol.atom_dict['atomicnum'] != 1]
        ref_atoms = ref.coords[ref.atom_dict['atomicnum'] != 1]
    else:
        mol_atoms = mol.coords
        ref_atoms = ref.coords
    if mol_atoms.shape == ref_atoms.shape:
        rmsd = np.sqrt(((mol_atoms - ref_atoms)**2).sum(axis=-1).mean())
        if normalize:
            rmsd /= np.sqrt(mol.num_rotors)
        return rmsd
    # at this point raise an exception
    raise ValueError('Unequal number of atoms in molecules (%i and %i)' %
                     (len(mol_atoms), len(ref_atoms)))
Beispiel #15
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([
                        self.executable, '--receptor', self.protein_file,
                        '--ligand', ligand_file, '--out', ligand_outfile
                    ] + self.params + ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [
                            int(line[7:12].strip()) for line in f
                            if line[:4] == 'ATOM'
                        ]
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1
                                 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand)
                        and oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                try:
                    clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                    clone.data['vina_rmsd_input_min'] = rmsd(
                        ligand, clone, method='min_symmetry')
                except Exception:
                    pass
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array
Beispiel #16
0
    def dock(self, ligands, protein=None):
        """Automated docking procedure.

        Parameters
        ----------
        ligands: iterable of oddt.toolkit.Molecule objects
            Ligands to dock

        protein: oddt.toolkit.Molecule object or None
            Protein object to be used. If None, then the default one
            is used, else the protein is new default.

        Returns
        -------
        ligands : array of oddt.toolkit.Molecule objects
            Array of ligands (scores are stored in mol.data method)
        """
        if protein:
            self.set_protein(protein)
        if not self.protein_file:
            raise IOError("No receptor.")
        if is_molecule(ligands):
            ligands = [ligands]
        ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_')
        output_array = []
        for n, ligand in enumerate(ligands):
            check_molecule(ligand, force_coords=True)
            ligand_file = write_vina_pdbqt(ligand, ligand_dir, name_id=n)
            ligand_outfile = ligand_file[:-6] + '_out.pdbqt'
            try:
                scores = parse_vina_docking_output(
                    subprocess.check_output([self.executable, '--receptor',
                                             self.protein_file,
                                             '--ligand', ligand_file,
                                             '--out', ligand_outfile] +
                                            self.params +
                                            ['--cpu', str(self.n_cpu)],
                                            stderr=subprocess.STDOUT))
            except subprocess.CalledProcessError as e:
                sys.stderr.write(e.output.decode('ascii'))
                if self.skip_bad_mols:
                    continue  # TODO: print some warning message
                else:
                    raise Exception('Autodock Vina failed. Command: "%s"' %
                                    ' '.join(e.cmd))

            # docked conformations may have wrong connectivity - use source ligand
            if is_openbabel_molecule(ligand):
                if oddt.toolkits.ob.__version__ >= '2.4.0':
                    # find the order of PDBQT atoms assigned by OpenBabel
                    with open(ligand_file) as f:
                        write_order = [int(line[7:12].strip())
                                       for line in f
                                       if line[:4] == 'ATOM']
                    new_order = sorted(range(len(write_order)),
                                       key=write_order.__getitem__)
                    new_order = [i + 1 for i in new_order]  # OBMol has 1 based idx

                    assert len(new_order) == len(ligand.atoms)
                else:
                    # Openbabel 2.3.2 does not support perserving atom order.
                    # We read back the PDBQT ligand to get "correct" bonding.
                    ligand = next(oddt.toolkit.readfile('pdbqt', ligand_file))
                    if 'REMARK' in ligand.data:
                        del ligand.data['REMARK']

            docked_ligands = oddt.toolkit.readfile('pdbqt', ligand_outfile)
            for docked_ligand, score in zip(docked_ligands, scores):
                # Renumber atoms to match the input ligand
                if (is_openbabel_molecule(docked_ligand) and
                        oddt.toolkits.ob.__version__ >= '2.4.0'):
                    docked_ligand.OBMol.RenumberAtoms(new_order)
                # HACK: copy docked coordinates onto source ligand
                # We assume that the order of atoms match between ligands
                clone = ligand.clone
                clone.clone_coords(docked_ligand)
                clone.data.update(score)

                # Calculate RMSD to the input pose
                clone.data['vina_rmsd_input'] = rmsd(ligand, clone)
                clone.data['vina_rmsd_input_min'] = rmsd(ligand, clone,
                                                         method='min_symmetry')
                output_array.append(clone)
        rmtree(ligand_dir)
        return output_array