Ejemplo n.º 1
0
def write_molecule(
    path,
    component: Component,
    remove_hs: bool = True,
    alt_names: bool = False,
    conf_type: ConformerType = ConformerType.Ideal,
):
    """Export molecule in a specified format. Presently supported formats
    are: PDB CCD CIF (*.cif); Mol file (*.sdf); Chemical Markup language
    (*.cml); PDB file (*.pdb); XYZ file (*.xyz); XML (*.xml).
    ConformerType.AllConformers is presently supported only for PDB.

    Args:
        path (str|Path): Path to the file. Suffix determines format to be
            used.
        component (Component): Component to be exported
        remove_hs (bool, optional): Defaults to True. Whether or not
            hydrogens should be removed.
        alt_names (bool, optional): Defaults to False. Whether or not
            alternate names should be exported.
        conf_type (ConformerType, optional):
            Defaults to ConformerType.Ideal. Conformer type to be
            exported.

    Raises:
        CCDUtilsError: For unsupported format
    """
    path = str(path) if isinstance(path, Path) else path

    extension = path.split(".")[-1].lower()
    str_representation = ""

    if extension in ("sdf", "mol"):
        str_representation = to_sdf_str(component, remove_hs, conf_type)
    elif extension == "pdb":
        str_representation = to_pdb_str(component, remove_hs, alt_names,
                                        conf_type)
    elif extension in ("mmcif", "cif"):
        to_pdb_ccd_cif_file(path, component, remove_hs)
        return
    elif extension == "cml":
        str_representation = to_cml_str(component, remove_hs, conf_type)
    elif extension == "xml":
        str_representation = to_xml_str(component, remove_hs, conf_type)
    elif extension == "xyz":
        str_representation = to_xyz_str(component, remove_hs, conf_type)
    elif extension == "json":
        str_representation = json.dumps(to_json_dict(component, remove_hs,
                                                     conf_type),
                                        sort_keys=True,
                                        indent=4)
    else:
        raise CCDUtilsError("Unsupported file format: {}".format(extension))

    with open(path, "w") as f:
        f.write(str_representation)
Ejemplo n.º 2
0
def to_sdf_str(
    component: Component,
    remove_hs: bool = True,
    conf_type: ConformerType = ConformerType.Ideal,
):
    """Converts structure to the SDF format.

    Args:
        component (Component): Component to be exported.
        remove_hs (bool, optional): Defaults to True.
        conf_type (ConformerType, optional): Defaults to ConformerType.Ideal.

    Raises:
        CCDUtilsError: In case the structure could not be exported.

    Returns:
        str: String representation of the component in the SDF format
    """
    (mol_to_save, _, conf_type) = _prepate_structure(component, remove_hs,
                                                     conf_type)

    mol_block = []

    if conf_type == ConformerType.AllConformers:
        conformers = [
            ConformerType.Model, ConformerType.Ideal, ConformerType.Computed
        ]
    else:
        conformers = [conf_type]
    try:
        for c in conformers:
            try:
                conf_id = -1
                if c != ConformerType.AllConformers:
                    conf_id = component.get_conformer(c).GetId()

                block = [
                    f"{component.id} - {c.name} conformer",
                    rdkit.Chem.MolToMolBlock(mol_to_save,
                                             confId=conf_id).strip(),
                    "$$$$\n",
                ]
                mol_block += block
            except ValueError as e:
                if str(e) == "Bad Conformer Id":
                    pass
                else:
                    raise CCDUtilsError(f"Error writing SDF file - {e}")
    except Exception:
        mol_block = _to_sdf_str_fallback(mol_to_save, component.id, conformers)

    return "\n".join(mol_block)
Ejemplo n.º 3
0
def write_molecule(path,
                   component: Component,
                   remove_hs: bool = True,
                   alt_names: bool = False,
                   conf_type: ConformerType = ConformerType.Ideal):
    """Export molecule in a specified format. Presently supported formats
    are: PDB CCD CIF (*.cif); Mol file (*.sdf); Chemical Markup language
    (*.cml); PDB file (*.pdb); XYZ file (*.xyz); XML (*.xml).
    ConformerType.AllConformers is presently supported only for PDB.

    Args:
        path (str): Path to the file. Extension determines format to be
            used.
        component (Component): Component to be exported
        remove_hs (bool, optional): Defaults to True. Whether or not
            hydrogens should be removed.
        alt_names (bool, optional): Defaults to False. Whether or not
            alternate names should be exported.
        conf_type (ConformerType, optional):
            Defaults to ConformerType.Ideal. Conformer type to be
            exported.

    Raises:
        CCDUtilsError: For unsupported format
    """
    extension = path.split('.')[-1].lower()
    str_representation = ''

    if extension in ('sdf', 'mol'):
        str_representation = to_sdf_str(component, remove_hs, conf_type)
    elif extension == 'pdb':
        str_representation = to_pdb_str(component, remove_hs, alt_names,
                                        conf_type)
    elif extension in ('mmcif', 'cif'):
        to_pdb_ccd_cif_file(path, component, remove_hs)
        return
    elif extension == 'cml':
        str_representation = to_cml_str(component, remove_hs, conf_type)
    elif extension == 'xml':
        str_representation = to_xml_str(component, remove_hs, conf_type)
    elif extension == 'xyz':
        str_representation = to_xyz_str(component, remove_hs, conf_type)
    elif extension == 'json':
        str_representation = json.dumps(to_json_dict(component, remove_hs,
                                                     conf_type),
                                        sort_keys=True,
                                        indent=4)
    else:
        raise CCDUtilsError('Unsupported file format: {}'.format(extension))

    with open(path, 'w') as f:
        f.write(str_representation)
Ejemplo n.º 4
0
def to_sdf_str(component: Component,
               remove_hs: bool = True,
               conf_type: ConformerType = ConformerType.Ideal):
    """Converts structure to the SDF format.

    Args:
        component (Component): Component to be exported.
        remove_hs (bool, optional): Defaults to True.
        conf_type (ConformerType, optional): Defaults to ConformerType.Ideal.

    Raises:
        CCDUtilsError: In case the structure could not be exported.

    Returns:
        str: String representation of the component in the SDF format
    """
    (mol_to_save, conf_id,
     conf_type) = _prepate_structure(component, remove_hs, conf_type)

    mol_block = []
    mappings = {}
    if conf_type == ConformerType.AllConformers:
        conformers = [
            ConformerType.Model, ConformerType.Ideal, ConformerType.Computed
        ]
    else:
        conformers = [conf_type]

    try:
        for conf in conformers:
            try:
                s = '{} - {} conformer'.format(component.id, conf.name)
                s += rdkit.Chem.MolToMolBlock(
                    mol_to_save, confId=component.conformers_mapping[conf])
                s += '$$$$'
                mol_block.append(s)
            except ValueError as e:
                if str(e) == 'Bad Conformer Id':
                    pass
                else:
                    raise CCDUtilsError(
                        'Error writing SDF file - {}'.format(e))
    except Exception:
        mappings = {
            m.name: component.conformers_mapping[m]
            for m in conformers
        }
        mol_block = _to_sdf_str_fallback(mol_to_save, component.id, mappings)

    return "\n".join(mol_block)
Ejemplo n.º 5
0
    def get_scaffolds(self,
                      scaffolding_method=ScaffoldingMethod.MurckoScaffold):
        """Compute deemed scaffolds for a given compound.

        Args:
            scaffolding_method (ScaffoldingMethod, optional):
                Defaults to MurckoScaffold. Scaffolding method to use

        Returns:
            list[rdkit.Chem.rdchem.Mol]: Scaffolds found in the component.
        """
        try:
            scaffolds = []

            if scaffolding_method == ScaffoldingMethod.MurckoScaffold:
                scaffolds = [(MurckoScaffold.GetScaffoldForMol(self.mol_no_h))]

            elif scaffolding_method == ScaffoldingMethod.MurckoGeneric:
                scaffolds = [
                    (MurckoScaffold.MakeScaffoldGeneric(self.mol_no_h))
                ]

            elif scaffolding_method == ScaffoldingMethod.Brics:
                scaffolds = BRICS.BRICSDecompose(self.mol_no_h)
                brics_smiles = [
                    re.sub(r"(\[[0-9]*\*\])", "[H]", i) for i in scaffolds
                ]  # replace dummy atoms with H's to get matches https://sourceforge.net/p/rdkit/mailman/message/35261974/
                brics_mols = [
                    rdkit.Chem.MolFromSmiles(x) for x in brics_smiles
                ]

                for mol in brics_mols:
                    rdkit.Chem.RemoveHs(mol)

                brics_hits = [
                    self.mol_no_h.GetSubstructMatches(i) for i in brics_mols
                ]

                for index, brics_hit in enumerate(brics_hits):
                    smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
                    name = scaffolding_method.name
                    source = 'RDKit scaffolds'
                    key = f'{name}_{smiles}'
                    brics_hit = conversions.listit(brics_hit)

                    if not smiles:
                        continue

                    if key not in self._scaffolds:
                        self._scaffolds[key] = SubstructureMapping(
                            name, smiles, source, brics_hit)

                return brics_mols

            for s in scaffolds:
                scaffold_atom_names = [
                    atom.GetProp('name') for atom in s.GetAtoms()
                ]
                mapping = []
                for at_name in scaffold_atom_names:
                    idx = [
                        atom.GetIdx() for atom in self.mol.GetAtoms()
                        if atom.GetProp('name') == at_name
                    ][0]
                    mapping.append(idx)

                smiles = rdkit.Chem.MolToSmiles(s)
                name = scaffolding_method.name
                source = 'RDKit scaffolds'

                if not smiles:
                    continue

                if name in self._scaffolds:
                    self._scaffolds[name].mappings.append(mapping)
                else:
                    self._scaffolds[name] = SubstructureMapping(
                        name, smiles, source, [mapping])

            return scaffolds

        except (RuntimeError, ValueError):
            raise CCDUtilsError(
                f'Computing scaffolds using method {scaffolding_method.name} failed.'
            )
Ejemplo n.º 6
0
    def export_2d_svg(self,
                      file_name: str,
                      width: int = 500,
                      names: bool = False,
                      wedge_bonds: bool = True,
                      atom_highlight: Dict[Any, Tuple] = None,
                      bond_highlight: Dict[Tuple, Tuple] = None):
        """Save 2D depiction of the component as an SVG file. Component
        id is generated in case the image cannot be drawn.

        Args:
            file_name (str): path to store 2d depiction
            width (int, optional): Defaults to 500. Width of a frame in pixels.
            names (bool, optional): Defaults to False. Whether or not to
                include atom names in depiction. If atom name is not set, element symbol is used instead.
            wedge_bonds (bool, optional): Defaults to True. Whether or not
                the molecule should be depicted with bond wedging.
            atomHighlight (:obj:`dict` of :obj:`tuple` of :obj:`float`, optional):
                Defaults to None. Atoms names to be highlighted along
                with colors in RGB. e.g. {'CA': (0.5, 0.5, 0.5)} or {0: (0.5, 0.5, 0.5)}
            bondHighlight (:obj:`dict` of :obj:`tuple` of :obj:`float`, optional):
                Defaults to None. Bonds to be highlighted along with
                colors in RGB. e.g. {('CA', 'CB'): (0.5, 0.5, 0.5)} or {(0, 1): (0.5, 0.5, 0.5)}

        Raises:
            CCDUtilsError: If bond or atom does not exist.
        """
        if self.mol2D is None:
            drawing.save_no_image(file_name, self.id, width)
            return

        drawer = Draw.rdMolDraw2D.MolDraw2DSVG(width, width)
        options = drawer.drawOptions()
        atom_mapping = {
            self._get_atom_name(a): i
            for i, a in enumerate(self.mol2D.GetAtoms())
        }

        atom_highlight = {} if atom_highlight is None else atom_highlight
        bond_highlight = {} if bond_highlight is None else bond_highlight

        if width < 201:
            options.bondLineWidth = 1

        if all(isinstance(i, str) for i in atom_highlight.keys()):
            atom_highlight = {
                atom_mapping[k]: v
                for k, v in atom_highlight.items()
            }
        else:
            atom_highlight = {}

        if bond_highlight:
            if all(
                    isinstance(i[0], str) and isinstance(i[1], str)
                    for i in bond_highlight.keys()):
                temp_highlight = {}
                for k, v in bond_highlight.items():
                    bond = self.mol2D.GetBondBetweenAtoms(
                        atom_mapping[k[0]], atom_mapping[k[1]])
                    if bond is None:
                        raise CCDUtilsError(
                            'Bond between {} and {} does not exist'.format(
                                k[0], k[1]))
                    temp_highlight[bond.GetIdx()] = v
                bond_highlight = temp_highlight

        if names:
            for i, a in enumerate(self.mol2D.GetAtoms()):
                atom_name = self._get_atom_name(a)
                options.atomLabels[i] = atom_name
                a.SetProp('molFileAlias', atom_name)

        drawing.draw_molecule(self.mol2D, drawer, file_name, wedge_bonds,
                              atom_highlight, bond_highlight)