def _initialize_from_topology(self): """ Initializes a SolventWrapper object using a peleffy's molecular Topology. """ logger = Logger() logger.info(' - Generating solvent parameters') from peleffy.utils.toolkits import OpenForceFieldToolkitWrapper off_toolkit = OpenForceFieldToolkitWrapper() GBSA_handler = off_toolkit.get_parameter_handler_from_forcefield( 'GBSA', self._ff_file) self._solvent_dielectric = GBSA_handler.solvent_dielectric self._solute_dielectric = GBSA_handler.solute_dielectric self._surface_area_penalty = GBSA_handler.surface_area_penalty self._solvent_radius = GBSA_handler.solvent_radius from peleffy.forcefield import OpenForceField forcefield = OpenForceField(self._ff_file) for idx, topology in enumerate(self.topologies): parameters = forcefield.parameterize(topology.molecule, charge_method='dummy') self._radii[idx] = parameters['GBSA_radii'] self._scales[idx] = parameters['GBSA_scales']
def calculate(self): """ Calculate conformation library from the BCE output. """ logger = Logger() logger.info(' - Calculating conformation library') self._calculate_all_conformations()
def _initialize_from_smiles(self, smiles): """ It initializes a molecule from a SMILES tag. Parameters ---------- smiles : str The SMILES tag to construct the molecule structure with """ logger = Logger() logger.info(' - Initializing molecule from a SMILES tag') self._initialize() logger.info(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = \ rdkit_toolkit.from_smiles(smiles, self.hydrogens_are_explicit) # TODO not sure if stereochemistry assignment from 3D is still necessary # RDKit must generate stereochemistry specifically from 3D coords # rdkit_toolkit.assign_stereochemistry_from_3D(self) # Set molecule name according to the SMILES tag if self.name == '': logger.info(' - Setting molecule name to \'{}\''.format(smiles)) self.set_name(smiles) logger.info(' - Representing molecule with the Open Force Field ' + 'Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() self._off_molecule = \ openforcefield_toolkit.from_rdkit(self, self.hydrogens_are_explicit)
def test_peleffy_main(self): """It checks the main function of peleffy.""" from peleffy.main import parse_args, main from peleffy.utils import Logger import logging ligand_path = get_data_file_path('ligands/benzene.pdb') with tempfile.TemporaryDirectory() as tmpdir: with temporary_cd(tmpdir): # Test default settings args = parse_args([ligand_path]) main(args) logger = Logger() for handler in logger._logger.handlers: assert handler.level == logging.INFO # Test silent settings args = parse_args([ligand_path, '--silent']) main(args) logger = Logger() for handler in logger._logger.handlers: assert handler.level == logging.CRITICAL # Test silent settings args = parse_args([ligand_path, '--debug']) main(args) logger = Logger() for handler in logger._logger.handlers: assert handler.level == logging.DEBUG
def __init__(self, topologies): """ Initializes an OBC1 object. Parameters ---------- topologies : a Topology object or list[Topology object] The molecular topology representation to write as a Impact template """ # Not implemented in PELE logger = Logger() logger.warning('OBC1 is not implemented in PELE') super().__init__(topologies)
def test_pdb_checkup(self): """It tests the safety check function for PDB files.""" LIGAND_GOOD = get_data_file_path('ligands/ethylene.pdb') LIGAND_ERROR1 = get_data_file_path('tests/ethylene_error1.pdb') LIGAND_ERROR2 = get_data_file_path('tests/ethylene_error2.pdb') LIGAND_ERROR3 = get_data_file_path('tests/ethylene_error3.pdb') LIGAND_ERROR4 = get_data_file_path('tests/ethylene_error4.pdb') # This should work without any complain _ = Molecule(LIGAND_GOOD) # All atom names need to be unique with pytest.raises(Exception): _ = Molecule(LIGAND_ERROR1) # All residue ids must match with pytest.raises(Exception): _ = Molecule(LIGAND_ERROR2) # All residue names must match with pytest.raises(Exception): _ = Molecule(LIGAND_ERROR3) # Check warning message in the logger when connectivity is missing import io from peleffy.utils import Logger import logging from importlib import reload logging.shutdown() reload(logging) log = Logger() log.set_level('WARNING') # Catch logger messages to string buffer with io.StringIO() as buf: log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) _ = Molecule(LIGAND_ERROR4) output = buf.getvalue() assert output == "Warning: input PDB has no information " \ + "about the connectivity and this could result in " \ + "an unexpected bond assignment\n"
def _build(self): """The topology builder.""" # In case the molecule has not been initialized if (self.molecule.rdkit_molecule is None or len(list(self.parameters.atom_iterator)) == 0): logger = Logger() logger.warning('Warning: the input molecule has not been ' + ' initialized and its topology will be empty') return self._build_atoms() self._build_bonds() self._build_angles() self._build_propers() self._build_impropers()
def get_by_name(self, forcefield_name): """ Given a forcefield name, it returns the corresponding force field class. Parameters ---------- forcefield_name : str The name of the requested forcefield Returns ------- forcefield : a peleffy.forcefield.forcefield.ForceField The force field that corresponds to the supplied name Raises ------ ValueError If the supplied forcefield_name is unknown Examples -------- Use the force field selector to select a force field by name >>> from peleffy.forcefield import ForceFieldSelector >>> selector = ForceFieldSelector() >>> openff = selector.get_by_name('openff_unconstrained-1.2.1.offxml') """ from peleffy.utils import Logger log = Logger() log.info(' - Loading \'{}\''.format(forcefield_name)) from .forcefield import (OpenForceField, OPLS2005ForceField) if forcefield_name.upper() in self._FF_TYPES['OPLS2005']: return OPLS2005ForceField() elif forcefield_name in self._FF_TYPES['OpenFF']: return OpenForceField(forcefield_name=forcefield_name) else: raise ValueError('Invalid force field name')
def __init__(self, dihedral_atom_indexes, molecule): """ It initializes a DihedralBenchmark object. Parameters ---------- dihedral_atom_indexes : tuple[int] The indexes of atoms involved in the dihedral molecule : an peleffy.topology.Molecule The peleffy's Molecule object """ # Hide peleffy output from peleffy.utils import Logger logger = Logger() logger.set_level('WARNING') molecule.assert_parameterized() self._atom_indexes = dihedral_atom_indexes self._molecule = molecule self._forcefield = molecule.forcefield
def _build_rotamers(self): """It builds the rotamers of the molecule.""" logger = Logger() if self.off_molecule and self.rdkit_molecule: logger.info(' - Generating rotamer library') if len(self.core_constraints) != 0: self._graph = MolecularGraphWithConstrainedCore( self, self.core_constraints) if len(self.core_constraints) == 1: logger.info(' - Core forced to contain atom: ' + self._graph.constraint_names[0]) else: logger.info( ' - Core forced to contain atoms: ' + ', '.join( atom_name.strip() for atom_name in self._graph.constraint_names)) else: self._graph = MolecularGraph(self) logger.info(' - Core set to the center of the molecule') self._rotamers = self._graph.get_rotamers()
def __init__(self, PELE_exec, PELE_src, n_proc=1): """ It initializes a MultiMinimizer object. Parameters ---------- PELE_exec : str Path to the PELE executable PELE_src : str Path to PELE source folder n_proc : int The number of processors to employ to gather and parse data """ # Supress INFO messages from peleffy from peleffy.utils import Logger log = Logger() log.set_level('WARNING') self._PELE_exec = PELE_exec self._PELE_src = PELE_src self._output_path = None self.n_proc = n_proc
def _pdb_checkup(self, path): """ Safety check for PDB files in order to properly handle exceptions related with its format prior running the parser. Parameters ---------- path : str The path to a PDB with the molecule structure """ # Parse PDB file atom_id, res_name, res_id = ([] for i in range(3)) connectivity = False with open(path) as pdb_file: for line in pdb_file: if line.startswith('ATOM') or line.startswith('HETATM'): atom_id.append(line[12:16]) res_name.append(line[17:20]) res_id.append(line[22:26]) if line.startswith('CONECT'): connectivity = True # Handle exceptions related with the PDB file format if not res_id[:-1] == res_id[1:]: raise Exception( 'A single ligand with immutable residue ids is expected') if not res_name[:-1] == res_name[1:]: raise Exception( 'A single ligand with immutable residue names is expected') if not len(atom_id) == len(set(atom_id)): raise Exception('Ligand in input PDB has no unique atom names') if not connectivity and self.connectivity_template is None: log = Logger() log.warning( "Warning: input PDB has no information about the " + "connectivity and this could result in an unexpected " + "bond assignment")
def main(args): """ It reads the command-line arguments and runs peleffy. Parameters ---------- args : argparse.Namespace It contains the command-line arguments that are supplied by the user Examples -------- From the command-line: >>> python main.py molecule.pdb -f openff_unconstrained-1.2.0.offxml -r 30 -o output_path/ --with_solvent --as_datalocal -c gasteiger """ exclude_terminal_rotamers = not args.include_terminal_rotamers # Supress OpenForceField toolkit warnings import logging logging.getLogger().setLevel(logging.ERROR) # Set peleffy logger to the corresponding level logger = Logger() if args.silent: logger.set_level('CRITICAL') elif args.debug: logger.set_level('DEBUG') else: logger.set_level('INFO') run_peleffy(pdb_file=args.pdb_file, forcefield_name=args.forcefield, resolution=args.resolution, charge_method=args.charge_method, exclude_terminal_rotamers=exclude_terminal_rotamers, output=args.output, with_solvent=args.with_solvent, as_datalocal=args.as_datalocal, chain=args.chain, conformation_path=args.conformations_info_path, charges_from_file=args.charges_from_file)
def _build_atoms(self): """It builds the atoms of the molecule.""" from peleffy.utils import Logger logger = Logger() coords = RDKitToolkitWrapper().get_coordinates(self.molecule) for index, (atom_name, atom_type, sigma, epsilon, charge, SGB_radius, vdW_radius, gamma, alpha) \ in enumerate(self.parameters.atom_iterator): atom = Atom(index=index, PDB_name=atom_name, OPLS_type=atom_type, x=coords[index][0], y=coords[index][1], z=coords[index][2], sigma=sigma, epsilon=epsilon, charge=charge, born_radius=SGB_radius, SASA_radius=vdW_radius, nonpolar_gamma=gamma, nonpolar_alpha=alpha) self.add_atom(atom) for atom in self.atoms: if atom.index in self.molecule.graph.core_nodes: atom.set_as_core() else: atom.set_as_branch() # Start from an atom from the core absolute_parent = None for atom in self.atoms: if atom.core: absolute_parent = atom.index break else: logger.error('Error: no core atom found in molecule ' + '{}'.format(self.molecule.name)) # Get parent indexes from the molecular graph parent_idxs = self.molecule.graph.get_parents(absolute_parent) # Assert parent_idxs has right length if len(parent_idxs) != len(self.atoms): logger.error('Error: no core atom found in molecule ' + '{}'.format(self.molecule.name)) for atom in self.atoms: parent_idx = parent_idxs[atom.index] if parent_idx is not None: atom.set_parent(self.atoms[parent_idx])
def test_pdb_fixer_logger_messages(self): """It checks the logger messages of the PDB fixer.""" from peleffy.utils import Logger import io molecule = Molecule(fix_pdb=True) # Check logger messages path3 = get_data_file_path('tests/ligSUV_no_elements3.pdb') import logging # Force a hard reset of logging library and the logger it manages from importlib import reload logging.shutdown() reload(logging) # Initiate logger log = Logger() # Try the default level (INFO) # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) _ = molecule._read_and_fix_pdb(path3) # Get string from buffer output = buf.getvalue() assert output == "Warning: input PDB has no information " \ + "about atom elements and they were inferred from " \ + "atom names. " \ + "Please, verify that the resulting elements are " \ + "correct\n" \ + "Error: PDB could not be fixed\n"
def __init__(self, pele_exec, pele_src, pele_license, ploprottemp_src, schrodinger_src, charge_method='am1bcc', solvent='OBC', opls_nonbonding=False, opls_bonds_angles=False, n_proc=1, forcefield_name=None, forcefield=None): """ It initialized an SolventBenchmark object. Parameters ---------- PELE_exec : str Path to the PELE executable PELE_src : str Path to PELE source folder PELE_license : str Path to PELE license directory ploprottemp_src : str Path to PlopRotTemp source code schrodinger_src : str Path to Schrodinger source code charge_method : str The method to calculate partial charges solvent : str The solvent model to employ opls_nonbonding : bool Whether to use OPLS2005 to parameterize nonbonding terms or not opls_bonds_angles : bool Whether to use OPLS2005 to paramterize bonds and angles or not n_proc : int Number of parallel computing processors to employ. Default is 1 forcefield_name : str The force field name to employ. Default is None forcefield : an peleffy.forcefield._BaseForceField The forcefield representation to employ. Default is None """ self.pele_exec = pele_exec self.pele_src = pele_src self.pele_license = pele_license self.charge_method = charge_method self.solvent = solvent self.opls_nonbonding = opls_nonbonding self.opls_bonds_angles = opls_bonds_angles self.ploprottemp_src = ploprottemp_src self.schrodinger_src = schrodinger_src self._n_proc = n_proc self.forcefield_name = forcefield_name self.forcefield = forcefield self._results = dict() # Deactivate peleffy output from peleffy.utils import Logger logger = Logger() logger.set_level('WARNING') # Supress OpenForceField toolkit warnings import logging logging.getLogger().setLevel(logging.ERROR)
def test_logger_levels(self): """ It checks the correct behaviour of the different log levels. """ def push_messages(log): """Pull some messages at different levels.""" log.debug('Debug message') log.info('Info message') log.warning('Warn message') log.error('Error message') log.critical('Critical message') import logging # Force a hard reset of logging library and the logger it manages from importlib import reload logging.shutdown() reload(logging) # Initiate logger log = Logger() # Try the default level (INFO) # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Info message\nWarn message\n' \ + 'Error message\nCritical message\n', \ 'Unexpected logger message at standard output' # Try DEBUG level # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Try DEBUG level log.set_level('DEBUG') # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Debug message\nInfo message\n'\ + 'Warn message\nError message\nCritical message\n', \ 'Unexpected logger message at standard output' # Try INFO level # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Try INFO level log.set_level('INFO') # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Info message\nWarn message\n' \ + 'Error message\nCritical message\n', \ 'Unexpected logger message at standard output' # Try WARNING level # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Try WARNING level log.set_level('WARNING') # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Warn message\nError message\n' \ + 'Critical message\n', \ 'Unexpected logger message at standard output' # Try ERROR level # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Try ERROR level log.set_level('ERROR') # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Error message\nCritical message\n', \ 'Unexpected logger message at standard output' # Try CRITICAL level # Catch logger messages to string buffer with io.StringIO() as buf: # Add custom handler to logger log_handler = logging.StreamHandler(buf) log._logger.handlers = list() log._logger.addHandler(log_handler) # Try CRITICAL level log.set_level('CRITICAL') # Push messages push_messages(log) # Get string from buffer output = buf.getvalue() assert output == 'Critical message\n', \ 'Unexpected logger message at standard output'
def _read_and_fix_pdb(self, path): """ It reads the input PDB file returns the corresponding PDB block. It also applies some modifications, in case it requires some fixing prior running the parser. Parameters ---------- path : str The path to a PDB with the molecule structure Returns ------- pdb_block : str The corresponding PDB block, with applied fixes if required """ log = Logger() # Skip PDB fixing if it has been deactivated if not self.fix_pdb: with open(path) as pdb_file: pdb_block = pdb_file.read() return pdb_block # Fix PDB missing_element = False any_fail = False pdb_block = '' with open(path) as pdb_file: for line in pdb_file: if line.startswith('ATOM') or line.startswith('HETATM'): if len(line) < 78 or line[76:78] == ' ': missing_element = True atom_name = line[12:16] # Try to infer element from atom name inferred_element = ''.join([ c for c in atom_name if not c.isdigit() and c != ' ' ]) # Format properly the element identifier if len(inferred_element) == 1: inferred_element = inferred_element.upper() elif len(inferred_element) == 2: inferred_element = inferred_element[0].upper() + \ inferred_element[1].lower() else: # We were expecting an element identifier of 1 or 2 chars any_fail = True break # Remove line breaks, if any line = line.strip() # Fill a short line with white spaces while (len(line) < 79): line += ' ' # Add element to line (right-justified) line = line[:76] + '{:>2s}'.format(inferred_element) \ + line[79:] + '\n' pdb_block += line if missing_element: log.warning("Warning: input PDB has no information about atom " + "elements and they were inferred from atom names. " + "Please, verify that the resulting elements are " + "correct") if any_fail: log.error("Error: PDB could not be fixed") with open(path) as pdb_file: pdb_block = pdb_file.read() return pdb_block
def get_parents(self, parent): """ It sets the parent of each atom according to the molecular graph. Parameters ---------- parent : int The index of the node to use as the absolute parent Returns ------- parents : dict[int, int] A dictionary containing the index of the parent of each atom according to the molecular graph, keyed by the index of each child """ def recursive_child_visitor(parent, parents, already_visited=set()): """ A recursive function that hierarchically visits all the childs of each atom. Parameters ---------- parent : int The index of the atom whose childs will be visited parents : dict[int, int] A dictionary containing the index of the parent of each atom according to the molecular graph, keyed by the index visited_neighbors : set[int] The updated set that contains the indexes of the atoms that have already been visited Returns ------- parents : dict[int, int] A dictionary containing the index of the parent of each atom according to the molecular graph, keyed by the index of each child visited_neighbors : set[int] The updated set that contains the indexes of the atoms that have already been visited """ if parent in already_visited: return already_visited already_visited.add(parent) childs = self.neighbors(parent) for child in childs: if child in already_visited: continue parents[child] = parent parents, already_visited = recursive_child_visitor( child, parents, already_visited) return parents, already_visited # Initialize the parents dictionary parents = {parent: None} parents, already_visited = recursive_child_visitor(parent, parents) # Assert absolut parent is the only with a None parent value if parents[parent] is not None or \ sum([int(parents[i] is not None) for i in self.nodes]) \ != len(self.nodes) - 1: from peleffy.utils import Logger logger = Logger() logger.error('Error: found descendant without parent') return parents
def run_peleffy(pdb_file, forcefield_name=DEFAULT_OFF_FORCEFIELD, resolution=DEFAULT_RESOLUTION, charge_method=DEFAULT_CHARGE_METHOD, charges_from_file=None, chain=None, exclude_terminal_rotamers=True, output=None, with_solvent=False, as_datalocal=False, conformation_path=None): """ It runs peleffy. Parameters ---------- pdb_file : str The path to the pdb_file to parameterize with peleffy forcefield_name : str The name of an OpenForceField's forcefield resolution : float The resolution in degrees for the rotamer library. Default is 30 charge_method : str The name of the method to use to compute partial charges. Default is 'am1bcc' charges_from_file : str The file containing the partial charges to assign to the molecule. Default is None chain : str Chain to the molecule if the PDB contains multiple molecules. exclude_terminal_rotamers : bool Whether to exclude terminal rotamers or not output : str Path where output files will be saved with_solvent : bool Whether to generate and save the solvent parameters for the input molecule or not as_datalocal : bool Whether to save output files following PELE's DataLocal hierarchy or not conformation_path: str Path to the BCE server outupt to use to extract dihedral angles dihedral_mode: str Select what kind of dihedrals to extract (all or only flexible) """ if charges_from_file is not None: charge_method_str = 'file\n' \ + ' - Charge file: {}'.format(charges_from_file) charge_method = 'dummy' else: charge_method_str = charge_method log = Logger() log.info('-' * 60) log.info('Open Force Field parameterizer for PELE', peleffy.__version__) log.info('-' * 60) log.info(' - General:') log.info(' - Input PDB:', pdb_file) log.info(' - Output path:', output) log.info(' - Write solvent parameters:', with_solvent) log.info(' - DataLocal-like output:', as_datalocal) log.info(' - Parameterization:') log.info(' - Force field:', forcefield_name) log.info(' - Charge method:', charge_method_str) log.info(' - Rotamer library:') log.info(' - Resolution:', resolution) log.info(' - Exclude terminal rotamers:', exclude_terminal_rotamers) log.info('-' * 60) from peleffy.topology import Molecule, BCEConformations from peleffy.template import Impact from peleffy.solvent import OBC2 from peleffy.forcefield import ForceFieldSelector from peleffy.topology import Topology from peleffy.utils import parse_charges_from_mae from peleffy.utils.input import PDBFile if not output: output = os.getcwd() # Initialize molecule if chain is not None: PDBreader = PDBFile(pdb_file) molecule = PDBreader.get_molecules_from_chain( selected_chain=chain, rotamer_resolution=resolution, exclude_terminal_rotamers=exclude_terminal_rotamers) else: molecule = Molecule( pdb_file, rotamer_resolution=resolution, exclude_terminal_rotamers=exclude_terminal_rotamers) # Initialize force field ff_selector = ForceFieldSelector() forcefield = ff_selector.get_by_name(forcefield_name) output_handler = OutputPathHandler(molecule, forcefield, output_path=output, as_datalocal=as_datalocal) # if conformation_path is set, we don't want a rotamer library if conformation_path is None: rotamer_library = peleffy.topology.RotamerLibrary(molecule) rotamer_library.to_file(output_handler.get_rotamer_library_path()) # Parameterize molecule with the selected force field log.info(' - Parameterizing molecule') parameters = forcefield.parameterize(molecule, charge_method=charge_method) # Update charge parameters from the MAE file if charges_from_file is not None: parameters = parse_charges_from_mae(charges_from_file, parameters) # Generate the molecular topology topology = Topology(molecule, parameters) log.info(' - Parameters were built successfully:') log.info(' - {} atoms'.format(len(topology.atoms))) log.info(' - {} bonds'.format(len(topology.bonds))) log.info(' - {} torsions'.format(len(topology.angles))) log.info(' - {} propers'.format(len(topology.propers))) log.info(' - {} impropers'.format(len(topology.impropers))) # Generate the impact template impact = Impact(topology) impact.to_file(output_handler.get_impact_template_path()) # Generate the solvent template if with_solvent: solvent = OBC2(topology) solvent.to_file(output_handler.get_solvent_template_path()) if conformation_path is not None: conformations = BCEConformations(topology, conformation_path) conformations.calculate() conformations.save(output_handler.get_conformation_library_path()) log.info(' - All files were generated successfully:') if conformation_path is None: log.info(' - {}'.format(output_handler.get_rotamer_library_path())) log.info(' - {}'.format(output_handler.get_impact_template_path())) if conformation_path is not None: log.info(' - {}'.format( output_handler.get_conformation_library_path())) if with_solvent: log.info(' - {}'.format(output_handler.get_solvent_template_path())) log.info('-' * 60)
def from_openff(openff_molecule, rotamer_resolution=30, exclude_terminal_rotamers=True, name='', tag='UNK', connectivity_template=None, core_constraints=[], allow_undefined_stereo=False, hydrogens_are_explicit=True): """ It initializes and returns a peleffy Molecule representation from an OpenForceField molecular representation. Parameters ---------- openff_molecule : an openforcefield.topology.Molecule object The OpenForceField's Molecule to use to initialize a peleffy Molecule object rotamer_resolution : float The resolution in degrees to discretize the rotamer's conformational space. Default is 30 exclude_terminal_rotamers : bool Whether to exclude terminal rotamers when generating the rotamers library or not name : str The molecule name tag : str The molecule tag. It must be a 3-character string connectivity_template : an rdkit.Chem.rdchem.Mol object A molecule represented with RDKit to use when assigning the connectivity of this Molecule object core_constraints : list[int or str] It defines the list of atoms to constrain in the core, thus, the core will be forced to contain them. Atoms can be specified through integers that match the atom index or strings that match with the atom PDB name allow_undefined_stereo : bool Whether to allow a molecule with undefined stereochemistry to be defined or try to assign the stereochemistry and raise a complaint if not possible. Default is False hydrogens_are_explicit : bool Whether the input molecule has explicit information about hydrogen atoms or not. Otherwise, they will be added when the molecule is built. Default is True Returns ------- molecule : an peleffy.topology.Molecule The resulting peleffy's Molecule object Examples -------- Load a molecule from an RDKit molecular representation >>> from rdkit import Chem >>> rdkit_molecule = Chem.MolFromPDBFile(pdb_path) >>> from peleffy.topology import Molecule >>> molecule = Molecule.from_rdkit(rdkit_molecule) """ if name == '': name = openff_molecule.name molecule = Molecule( rotamer_resolution=30, exclude_terminal_rotamers=exclude_terminal_rotamers, name=name, tag=tag, connectivity_template=connectivity_template, core_constraints=core_constraints, allow_undefined_stereo=allow_undefined_stereo, hydrogens_are_explicit=hydrogens_are_explicit) logger = Logger() logger.info(' - Initializing molecule from an OpenFF ' + 'molecular representation') molecule._initialize() molecule._off_molecule = openff_molecule logger.info(' - Generating RDKit molecular representation with ' + 'the Open Force Field Toolkit') molecule._rdkit_molecule = openff_molecule.to_rdkit() molecule._build_rotamers() return molecule
def _extract_molecules_from_chain(self, chain, rotamer_resolution, exclude_terminal_rotamers, allow_undefined_stereo, core_constraints): """ It extracts all hetero molecules found in the selected the chain of a PDB file. Parameters ---------- chain_id : str Chain ID. rotamer_resolution : float The resolution in degrees to discretize the rotamer's conformational space. Default is 30 exclude_terminal_rotamers : bool Whether to exclude terminal rotamers when generating the rotamers library or not allow_undefined_stereo : bool Whether to allow a molecule with undefined stereochemistry to be defined or try to assign the stereochemistry and raise a complaint if not possible. Default is False core_constraints : list[int or str] It defines the list of atoms to constrain in the core, thus, the core will be forced to contain them. Atoms can be specified through integers that match the atom index or strings that match with the atom PDB name Returns ------- molecules : list[peleffy.topology.Molecule object] Selected molecules """ from peleffy.topology.molecule import Molecule # Check if there is more than one hetero molecule in the same chain residues_ids = set([ line[22:26].strip() for line in self.pdb_content if line.startswith('HETATM') and line[21:22] == chain and not line[17:20].strip() == 'HOH' ]) molecules = [] for residue_id in residues_ids: res_name = set([ line[17:20].strip() for line in self.pdb_content if line.startswith('HETATM') and line[21:22] == chain and line[22:26].strip() == residue_id ]) # Select which atoms compose this hetero molecule atom_ids = [ line[6:11].strip() for line in self.pdb_content if line.startswith('HETATM') and line[21:22] == chain and line[22:26].strip() == residue_id ] # Extract the PDB block of the molecule pdb_block = [ line for line in self.pdb_content if (line.startswith('HETATM') or line.startswith('CONECT')) and any(' {} '.format(a) in line for a in atom_ids) ] try: molecules.append( Molecule( pdb_block=''.join(pdb_block), rotamer_resolution=rotamer_resolution, exclude_terminal_rotamers=exclude_terminal_rotamers, allow_undefined_stereo=allow_undefined_stereo, core_constraints=core_constraints)) except Exception as e: log = Logger() log.warning(' - Skipping {} '.format(list(res_name)[0]) + 'from chain {}'.format(chain)) log.warning(' - The following exception was raised: ' + '{}'.format(e)) return molecules
def _initialize_from_pdb(self, path): """ It initializes a molecule with the molecule structure read from a PDB file. Parameters ---------- path : str The path to a PDB with the molecule structure """ logger = Logger() logger.info(' - Initializing molecule from PDB') self._initialize() # Validate PDB self._pdb_checkup(path) # Read and fix PDB pdb_block = self._read_and_fix_pdb(path) logger.info(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = \ rdkit_toolkit.from_pdb_block(pdb_block, self.hydrogens_are_explicit) # Use RDKit template, if any, to assign the connectivity to # the current Molecule object if self.connectivity_template is not None: logger.info(' - Assigning connectivity from template') rdkit_toolkit.assign_connectivity_from_template(self) if not self.allow_undefined_stereo: # RDKit must generate stereochemistry specifically from 3D coords logger.info(' - Assigning stereochemistry from 3D coordinates') rdkit_toolkit.assign_stereochemistry_from_3D(self) # Set molecule name according to PDB name if self.name == '': from pathlib import Path name = Path(path).stem logger.info(' - Setting molecule name to \'{}\''.format(name)) self.set_name(name) # Set molecule tag according to PDB's residue name if self.tag == 'UNK': tag = rdkit_toolkit.get_residue_name(self) logger.info(' - Setting molecule tag to \'{}\''.format(tag)) self.set_tag(tag) logger.info(' - Representing molecule with the Open Force Field ' + 'Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() self._off_molecule = \ openforcefield_toolkit.from_rdkit(self, self.hydrogens_are_explicit)
def _initialize_from_pdb_block(self, pdb_block): """ It initializes a molecule with the molecule structure fetch in a PDB block. Parameters ---------- pdb_block : str PDB block with the molecule structure """ logger = Logger() logger.info(' - Initializing molecule from PDB') self._initialize() logger.info(' - Loading molecule from RDKit') rdkit_toolkit = RDKitToolkitWrapper() self._rdkit_molecule = \ rdkit_toolkit.from_pdb_block(pdb_block, self.hydrogens_are_explicit) # Use RDKit template, if any, to assign the connectivity to # the current Molecule object if self.connectivity_template is not None: logger.info(' - Assigning connectivity from template') rdkit_toolkit.assign_connectivity_from_template(self) if not self.allow_undefined_stereo: # RDKit must generate stereochemistry specifically from 3D coords logger.info(' - Assigning stereochemistry from 3D coordinates') rdkit_toolkit.assign_stereochemistry_from_3D(self) # Set molecule tag according to PDB's residue name if self.tag == 'UNK': tag = rdkit_toolkit.get_residue_name(self) logger.info(' - Setting molecule tag to \'{}\''.format(tag)) self.set_tag(tag) logger.info(' - Representing molecule with the Open Force Field ' + 'Toolkit') openforcefield_toolkit = OpenForceFieldToolkitWrapper() self._off_molecule = \ openforcefield_toolkit.from_rdkit(self, self.hydrogens_are_explicit)