def test_svg_annotation(component: Component, tmpdir_factory): if not component.atoms_ids: return json_obj = None wd = tmpdir_factory.mktemp('svg_json_test') out_file = os.path.join(wd, f'{component.id}.json') component.compute_2d(depictions) component.export_2d_annotation(out_file) assert os.path.isfile(out_file) assert os.path.getsize(out_file) > 0 with open(out_file, 'r') as fp: json_obj = json.load(fp) assert json_obj['ccd_id'] == component.id assert json_obj['resolution']['x'] >= 0 assert json_obj['resolution']['y'] >= 0 atom_names = [atom['name'] for atom in json_obj['atoms']] assert len(json_obj['atoms']) == component.mol_no_h.GetNumAtoms() assert len(json_obj['bonds']) >= component.mol_no_h.GetNumBonds() assert any( atom['label'] for atom in json_obj['atoms']) # do we have any labels (not all atoms has one) assert all(atom['name'] for atom in json_obj['atoms']) # do we have atom names? assert all(bond['bgn'] in atom_names and bond['end'] in atom_names for bond in json_obj['bonds']) # are all the atoms defined? assert all(bond['coords'] for bond in json_obj['bonds']) # do we have coordinates? assert all(bond['style'] for bond in json_obj['bonds']) # and its stylling?
def test_svg_annotation(component: Component, tmpdir_factory): if not component.atoms_ids: return json_obj = None wd = tmpdir_factory.mktemp("svg_json_test") out_file = os.path.join(wd, f"{component.id}.json") component.compute_2d(depictions) component.export_2d_annotation(out_file) assert os.path.isfile(out_file) assert os.path.getsize(out_file) > 0 with open(out_file, "r") as fp: json_obj = json.load(fp) assert json_obj["ccd_id"] == component.id assert json_obj["resolution"]["x"] >= 0 assert json_obj["resolution"]["y"] >= 0 atom_names = [atom["name"] for atom in json_obj["atoms"]] assert len(json_obj["atoms"]) == component.mol_no_h.GetNumAtoms() assert len(json_obj["bonds"]) >= component.mol_no_h.GetNumBonds() assert all(atom["name"] for atom in json_obj["atoms"]) # do we have atom names? assert all( bond["bgn"] in atom_names and bond["end"] in atom_names for bond in json_obj["bonds"] ) # are all the atoms defined? assert all( bond["coords"] for bond in json_obj["bonds"] ) # do we have coordinates? assert all(bond["style"] for bond in json_obj["bonds"]) # and its stylling?
def _compute_component_scaffolds(self, component: Component): """Compute scaffolds for a given component. Args: component (Component): Component to be processed """ try: component.get_scaffolds() except CCDUtilsError as e: logging.error(str(e)) return logging.debug(f"{len(component.scaffolds)} scaffold(s) were found.")
def _parse_pdb_mmcif(cif_dict): """ Create internal representation of the molecule from mmcif format. Args: cif_dict (dict): mmcif category Returns: CCDReaderResult: internal representation with the results of parsing and Mol object. """ warnings = list() errors = list() mol = rdkit.Chem.RWMol() atoms_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_atom', warnings) bonds_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_bond', warnings) identifiers_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_identifier', warnings) descriptors_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_descriptor', warnings) properties_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp', warnings) _parse_pdb_atoms(mol, atoms_dict) _parse_pdb_conformers(mol, atoms_dict) _parse_pdb_bonds(mol, bonds_dict, atoms_dict, errors) _handle_implicit_hydrogens(mol) descriptors = _parse_pdb_descriptors(descriptors_dict, 'descriptor') descriptors += _parse_pdb_descriptors(identifiers_dict, 'identifier') properties = _parse_pdb_properties(properties_dict) comp = Component(mol.GetMol(), cif_dict, properties, descriptors) reader_result = CCDReaderResult(warnings=warnings, errors=errors, component=comp) return reader_result
def to_pdb_ccd_cif_file(path, component: Component, remove_hs=True): """Converts structure to the PDB CIF format. Both model and ideal coordinates are stored. In case ideal coordinates are missing, rdkit attempts to generate 3D coordinates of the conformer. Args: path (str): Path to save cif file. component (Component): Component to be exported. remove_hs (bool, optional): Defaults to True. """ if not isinstance(component.ccd_cif_dict, dict): component.ccd_cif_dict = _to_pdb_ccd_cif_dict(component) cif_copy = copy.deepcopy(component.ccd_cif_dict) _add_sw_info_cif(cif_copy) _add_2d_depiction_cif(component, cif_copy) _add_fragments_and_scaffolds_cif(component, cif_copy) _add_rdkit_properties_cif(component, cif_copy) _add_unichem_mapping_cif(component, cif_copy) _add_rdkit_conformer_cif(component, cif_copy, remove_hs) if remove_hs: h_indices: List[int] = [ i for i, x in enumerate(cif_copy['_chem_comp_atom']['type_symbol']) if x == "H" ] h_names: List[str] = [ cif_copy['_chem_comp_atom']['atom_id'][i] for i in h_indices ] hb_indices = [] for key in ('atom_id_1', 'atom_id_2'): indices = [ i for i, k in enumerate(cif_copy['_chem_comp_bond'][key]) if k in h_names ] hb_indices += indices hb_indices = list(set(hb_indices)) # scrap hydrogen atoms for key in cif_copy['_chem_comp_atom']: cif_copy['_chem_comp_atom'][key] = ([ k for i, k in enumerate(cif_copy['_chem_comp_atom'][key]) if i not in h_indices ]) # scrap bonds to hydrogen atoms for key in cif_copy['_chem_comp_bond']: cif_copy['_chem_comp_bond'][key] = ([ k for i, k in enumerate(cif_copy['_chem_comp_bond'][key]) if i not in hb_indices ]) cfd = mmcif.CifFileWriter(path) cfd.write({component.id: cif_copy})
def _generate_ideal_structure(self, component: Component): """Checks whether or not the component has degenerated ideal coordinates. If so, new conformer is attempted to be generated. Args: component (Component): Component to be processed. Return: bool: Whether the ideal coordinates have been successfully recalculated, false otherwise. """ result = component.compute_3d() if component.has_degenerated_conformer(ConformerType.Ideal): logging.debug("has degenerated ideal coordinates.") if not result: logging.debug("error in generating 3D conformation.") return result
def to_sdf_str( component: Component, remove_hs: bool = True, conf_type: ConformerType = ConformerType.Ideal, ): """Converts structure to the SDF format. Args: component (Component): Component to be exported. remove_hs (bool, optional): Defaults to True. conf_type (ConformerType, optional): Defaults to ConformerType.Ideal. Raises: CCDUtilsError: In case the structure could not be exported. Returns: str: String representation of the component in the SDF format """ (mol_to_save, _, conf_type) = _prepate_structure(component, remove_hs, conf_type) mol_block = [] if conf_type == ConformerType.AllConformers: conformers = [ ConformerType.Model, ConformerType.Ideal, ConformerType.Computed ] else: conformers = [conf_type] try: for c in conformers: try: conf_id = -1 if c != ConformerType.AllConformers: conf_id = component.get_conformer(c).GetId() block = [ f"{component.id} - {c.name} conformer", rdkit.Chem.MolToMolBlock(mol_to_save, confId=conf_id).strip(), "$$$$\n", ] mol_block += block except ValueError as e: if str(e) == "Bad Conformer Id": pass else: raise CCDUtilsError(f"Error writing SDF file - {e}") except Exception: mol_block = _to_sdf_str_fallback(mol_to_save, component.id, conformers) return "\n".join(mol_block)
def _search_fragment_library(self, component: Component): """Search fragment library to find hits Args: component (Component): Component to be processed """ matches = component.library_search(self.fragment_library) if matches: logging.debug( f"{len(matches)} matches found in the library `{self.fragment_library.name}`." )
def _parse_pdb_mmcif(cif, sanitize=True): """ Create internal representation of the molecule from mmcif format. Args: cif (dict): mmcif dictionary sanitize (bool): Whether or not the rdkit component should be sanitized. Defaults to True. Returns: CCDReaderResult: internal representation with the results of parsing and Mol object. """ warnings = [] errors = [] sanitized = False mol = rdkit.Chem.RWMol() for c in preprocessable_categories: w = cif_tools.preprocess_cif_category(cif, c) if w: warnings.append(w) _parse_pdb_atoms(mol, cif) _parse_pdb_conformers(mol, cif) _parse_pdb_bonds(mol, cif, errors) _handle_implicit_hydrogens(mol) if sanitize: sanitized = mol_tools.sanitize(mol) descriptors = _parse_pdb_descriptors(cif, "_pdbx_chem_comp_descriptor", "descriptor") descriptors += _parse_pdb_descriptors(cif, "_pdbx_chem_comp_identifier", "identifier") properties = _parse_pdb_properties(cif["_chem_comp"]) comp = Component(mol.GetMol(), cif, properties, descriptors) reader_result = CCDReaderResult(warnings=warnings, errors=errors, component=comp, sanitized=sanitized) return reader_result
def test_plain_cif_write(component: Component, tmpdir, rem_hs): path = tmpdir.join(f"{component.id}.cif") to_check = must_have_categories.copy() component.ccd_cif_dict = None ccd_writer.write_molecule(str(path), component, remove_hs=rem_hs) json_obj = reader.read(path) if component.id == "NA": # Na is an atom! to_check.pop(2) # remove "_chem_comp_bond" if component.id == "D3O" and rem_hs: # D3O has single heavy atom to_check.pop(2) assert json_obj assert component.id in json_obj for c in to_check: assert c in json_obj[component.id]
def _generate_depictions(self, component: Component): """Generate nice 2D depictions for the component. Presently depictions are generated in the following resolutions (100,200,300,400,500) with and without atom names. Args: component (Component): Component to be depicted. depictions (DepictionManager): Helper class to carry out depiction process. parent_dir (str): Where the depiction should be stored """ parent_dir = os.path.join(self.output_dir, component.id[0], component.id) depiction_result = component.compute_2d(self.depictions) if depiction_result.source == DepictionSource.Failed: self.logger.debug('failed to generate 2D image.') else: if depiction_result.score > 0.99: self.logger.debug( 'collision free image could not be generated.') self.logger.debug( f'2D generated using {depiction_result.source.name} with score {depiction_result.score}.' ) wedge_bonds = depiction_result.template_name != 'cube' for i in range(100, 600, 100): component.export_2d_svg(os.path.join(parent_dir, f'{component.id}_{i}.svg'), width=i, wedge_bonds=wedge_bonds) component.export_2d_svg(os.path.join( parent_dir, f'{component.id}_{i}_names.svg'), width=i, names=True, wedge_bonds=wedge_bonds) component.export_2d_annotation(os.path.join( parent_dir, f'{component.id}_annotation.json'), wedge_bonds=wedge_bonds)
def _generate_depictions(self, component: Component, out_dir: str): """Generate nice 2D depictions for the component and depiction annotations in JSON format. Presently depictions are generated in the following resolutions (100,200,300,400,500) with and without atom names. Args: component (Component): Component to be depicted. out_dir (str): Where the depictions should be stored. """ depiction_result = component.compute_2d(self.depictions) if depiction_result.source == DepictionSource.Failed: logging.debug("failed to generate 2D image.") else: if depiction_result.score > 0.99: logging.debug("collision free image could not be generated.") logging.debug( f"2D generated using {depiction_result.source.name} with score {depiction_result.score}." ) wedge_bonds = depiction_result.template_name != "cube" for i in range(100, 600, 100): component.export_2d_svg( os.path.join(out_dir, f"{component.id}_{i}.svg"), width=i, wedge_bonds=wedge_bonds, ) component.export_2d_svg( os.path.join(out_dir, f"{component.id}_{i}_names.svg"), width=i, names=True, wedge_bonds=wedge_bonds, ) component.export_2d_annotation( os.path.join(out_dir, f"{component.id}_annotation.json"), wedge_bonds=wedge_bonds, )