def __init__(self, path='', verbosity=0): """ Constructs a UserStorageDirectory object :param str path: use this dir, default: $XDG_CONFIG_HOME/fep_automate :param int verbosity: verbosity level """ if not path: try: self.path = os.environ['XDG_CONFIG_HOME'] except KeyError: try: self.path = os.path.join(os.environ['HOME'], '.config') except KeyError: # Is this unix? self.path = os.path.join(os.curdir, '.config') os_util.local_print( 'You seem to be running on a non-UNIX system (or there are issues in your ' 'environment). Trying to go on, but you may experience errors.', msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: self.path = path self.path = os.path.join(self.path, 'fep_automate') try: os.mkdir(self.path) except FileExistsError: pass
def adjust_query_properties(query_molecule, generic_atoms=False, ignore_charge=True, ignore_isotope=True, verbosity=0): """ Adjust query settings removing all charges, isotope, aromaticity and valence info from core_structure SMARTS :param rdkit.Chem.Mol query_molecule: query molecule :param bool generic_atoms: make atoms generic :param bool ignore_charge: set all atomic charges to 0 :param bool ignore_isotope: ignore atomic isotopes :param int verbosity: controls the verbosity level :rtype: rdkit.Chem.Mol """ os_util.local_print( 'Entering adjust_query_properties(query_molecule={} (SMILES={}), generic_atoms={}, ' 'verbosity={})' ''.format(query_molecule, rdkit.Chem.MolToSmiles(query_molecule), generic_atoms, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) new_query_molecule = rdkit.Chem.Mol(query_molecule) # Parameters to GetSubstructMatch query_m = rdkit.Chem.rdmolops.AdjustQueryParameters() query_m.makeBondsGeneric = True query_m.makeDummiesQueries = True query_m.adjustDegree = False if generic_atoms: query_m.makeAtomsGeneric = True else: if ignore_isotope: [ a0.SetQuery( rdkit.Chem.MolFromSmarts('[#{}]'.format( a0.GetAtomicNum())).GetAtomWithIdx(0)) for a0 in new_query_molecule.GetAtoms() if isinstance(a0, rdkit.Chem.QueryAtom) ] if ignore_charge: [a0.SetFormalCharge(0) for a0 in new_query_molecule.GetAtoms()] new_query_molecule = rdkit.Chem.AdjustQueryProperties( new_query_molecule, query_m) os_util.local_print( 'The molecule {} (SMARTS={}) was altered by adjust_query_properties to {} (SMARTS={})' ''.format(query_molecule, rdkit.Chem.MolToSmiles(query_molecule), new_query_molecule, rdkit.Chem.MolToSmiles(new_query_molecule)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) return new_query_molecule
def process_custom_mcs(custom_mcs, savestate=None, verbosity=0): """ Parses user supplied custom MCS data :param [str, dict] custom_mcs: mcs data to be parsed :param savestate_util.SavableState savestate: saved state data :param int verbosity: controls verbosity level :rtype: dict """ custom_mcs_result = {} if custom_mcs: custom_mcs = os_util.detect_type(custom_mcs, test_for_dict=True) if isinstance(custom_mcs, str): if rdkit.Chem.MolFromSmarts(custom_mcs) is not None: os_util.local_print( 'Using user-supplied MCS {} for all molecules.'.format( custom_mcs), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) custom_mcs_result = {'*': custom_mcs} else: os_util.local_print( 'Could not parse you custom MCS "{}".'.format(custom_mcs), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) elif isinstance(custom_mcs, dict): if all([(isinstance(key, frozenset) and len(key) == 2) for key in custom_mcs]): custom_mcs_result = custom_mcs elif all([(isinstance(key, str) and key.count('-') == 1) for key in custom_mcs]): custom_mcs_result = { frozenset(key.split('-')): value for key, value in custom_mcs.items() } else: os_util.local_print( 'Could not parse you custom MCS "{}". If providing a dict, make sure to follow ' 'the required format (see documentation).'.format( custom_mcs), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: os_util.local_print( 'Could not parse you custom MCS. A string or dict is required, but your data "{}" ' 'was parsed as a {} (see documentation for formatting options).' ''.format(custom_mcs, type(custom_mcs)), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) if savestate is not None: savestate['custom_mcs'] = custom_mcs_result savestate.setdefault('mcs_dict', {}).update(custom_mcs_result) savestate.save_data() return custom_mcs_result
def generic_mol_read(ligand_format, ligand_data, verbosity=0): """ Tries to read a ligand detecting formats and types :param str ligand_format: data format or extension :param [str, rdkit.Chem.Mol] ligand_data: data to be read :param int verbosity: set verbosity :rtype: rdkit.Chem.Mol """ if isinstance(ligand_data, rdkit.Chem.Mol): return ligand_data if ligand_format in ['mol2', '.mol2']: docking_mol_rd = rdkit.Chem.MolFromMol2Block(ligand_data, removeHs=False) if docking_mol_rd is None: docking_mol_rd = rdkit.Chem.MolFromMol2File(ligand_data, removeHs=False) elif ligand_format in ['mol', '.mol']: docking_mol_rd = rdkit.Chem.MolFromMolBlock(ligand_data, removeHs=False) if docking_mol_rd is None: docking_mol_rd = rdkit.Chem.MolFromMolFile(ligand_data, removeHs=False) elif ligand_format in ['pdbqt', '.pdbqt', 'pdb', '.pdb']: os_util.local_print( 'You are reading a pdb or pdbqt file ({}), which requires openbabel. Should this fail, you ' 'may try converting it to a mol2 before hand. This may be unsafe.'. format(ligand_data), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) import pybel try: ob_molecule = pybel.readstring('pdb', ligand_data) except OSError: ob_molecule = pybel.readfile('pdb', ligand_data).__next__() docking_mol_rd = mol_util.obmol_to_rwmol(ob_molecule) else: os_util.local_print('Failed to read pose data from {} with type {}' ''.format(ligand_data, ligand_format), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) return docking_mol_rd
def get_position_matrix(each_mol, each_mol_str=None, atom_selection=None, verbosity=0): """ :param pybel.Molecule each_mol: molecule to get positions from :param list each_mol_str: a alignment string from where residues to be used will be read :param list atom_selection: use atoms matching this name (default: CA) :param int verbosity: sets the verbosity level :rtype: list """ if atom_selection is None: atom_selection = ['CA'] if each_mol_str is None: each_mol_str = [True for _ in range(len(each_mol.residues))] added_atoms = [] return_list = [] for each_residue, residue_alignment in zip(each_mol.residues, each_mol_str): for each_atom in each_residue.atoms: if each_atom.OBAtom.GetResidue().GetAtomID( each_atom.OBAtom).lstrip().rstrip() in atom_selection: atom_str = '{}{}{}'.format( each_atom.OBAtom.GetResidue().GetAtomID(each_atom.OBAtom), each_residue.name, each_residue.idx) if residue_alignment: if atom_str not in added_atoms: return_list.append(each_atom.OBAtom.GetVector()) added_atoms.append(atom_str) else: os_util.local_print( 'Atom {} found twice in your protein {}. Cannot handle multiple ' 'occupancies.'.format(atom_str, each_mol.title), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) return return_list
def save_data(self, output_file='', verbosity=0): """ Save state to a pickle file :param str output_file: save result to this file :param int verbosity: controls verbosity level :rtype: bool """ if output_file != '': self.data_file = output_file try: with open('.temp_pickle_test.pkl', 'wb') as file_handler: pickle.dump(self.__dict__, file_handler) os.fsync(file_handler.fileno()) except (IOError, FileNotFoundError): os_util.local_print('Could not save data to {}'.format( self.data_file), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise SystemExit(1) else: try: os.replace('.temp_pickle_test.pkl', self.data_file) except FileNotFoundError as error: os_util.local_print( 'Failed to save progress data to file {}'.format( self.data_file), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise FileNotFoundError(error) os_util.local_print('Saved data to {}'.format(self.data_file), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug) return True
def extract_docking_receptor(receptor_file, verbosity=0): """ Reads a docking receptor file :param str receptor_file: receptor file :param int verbosity: be verbosity :rtype: pybel.OBMol """ import pybel if verbosity <= 3: pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError) receptor_format = os.path.splitext(receptor_file)[1].lstrip('.') if receptor_format == 'pdbqt': receptor_format = 'pdb' os_util.local_print('Reading receptor data from {} as a {} file'.format(receptor_file, receptor_format), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) try: receptor_mol_local = pybel.readfile(receptor_format, receptor_file).__next__() except ValueError as error_data: os_util.local_print('Could not understand format {} (guessed from extension). Error was: {}' ''.format(receptor_format, error_data), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise SystemExit(1) except (IOError, StopIteration) as error_data: os_util.local_print('Could not read file {} using format {} (guessed from extension). Error was: {}' ''.format(receptor_file, receptor_format, error_data), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise SystemExit(1) else: return receptor_mol_local
def store_file(self, source, dest_file='', verbosity=0): """ Copy file or dir to storage dir :param str source: file to be copied :param str dest_file: new file name, default: use source_file name :param int verbosity: verbosity level :rtype: bool """ if not dest_file: dest_file = os.path.basename(source) if dest_file == '': # source is a directory dest_file = source.split(os.sep)[-1] if dest_file in ['', '.']: os_util.local_print( 'Could not get a name from {} and dest_file was not supplied. Cannot go continue.' ''.format(source), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise ValueError('invalid source name') backup_name = os.path.join( self.path, '{}_{}{}'.format(os.path.basename(dest_file), strftime('%H%M%S_%d%m%Y'), os.path.splitext(dest_file))) try: copy2(source, os.path.join(self.path, dest_file)) copy2(source, backup_name) except IsADirectoryError: try: copytree(source, os.path.join(self.path, dest_file)) except FileExistsError: rmtree(os.path.join(self.path, dest_file)) copytree(source, os.path.join(self.path, dest_file)) finally: copytree(source, os.path.join(self.path, backup_name)) return True
def __init__(self, input_file='', verbosity=0): """ Init SavableState by reading a pickle file, or doing nothing. If no input_file is given, a default name will be generated :param str input_file: save result to this file :param int verbosity: control verbosity level :rtype dict """ super().__init__() if input_file: saved_data = self._read_data(input_file) for key, value in saved_data.items(): setattr(self, key, value) try: if self.data_file != input_file and verbosity >= 0: # User moved/renamed progress file or something wired happened os_util.local_print( 'Progress file {} claims to be generated as file {}' ''.format(input_file, self.data_file), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) self.data_file = input_file except AttributeError: os_util.local_print( 'Progress file {} does not contain data_file data. Is it a progress file?' ''.format(input_file, self.data_file), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise ValueError('Invalid progress file') else: # User did not supplied a name, generate one from time import strftime self.data_file = 'savedata_{}.pkl'.format( strftime('%d%m%Y_%H%M%S'))
def align_sequences_match_residues(mobile_seq, target_seq, seq_align_mat='BLOSUM80', gap_penalty=-1.0, verbosity=0): """ Align two aminoacid sequences using Bio.pairwise2.globalds and substution matrix seq_align_mat, return a tuple with two list of residues to be used in the 3D alignment (mobile, refence) :param str mobile_seq: sequence of mobile protein :param str target_seq: sequence of target protein :param str seq_align_mat: use this substution matrix from Bio.SubsMat.MatrixInfo :param float gap_penalty: gap penalty to the alignment; avoid values too low in module :param int verbosity: sets the verbosity level :rtype: tuple """ try: from Bio.pairwise2 import align from Bio.Align import substitution_matrices seq_align_mat = substitution_matrices.load(seq_align_mat) except ImportError as error: os_util.local_print( 'Failed to import Biopython with error: {}\nBiopython is necessary to sequence' 'alignment. Sequences to be aligned:\nReference: {}\nMobile: {}' ''.format(error, target_seq, mobile_seq), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise ImportError(error) except FileNotFoundError as error: available_matrices = substitution_matrices.load() os_util.local_print( 'Failed to import substitution matrix {} with error: {}\nSubstitution matrix must be one ' 'of: {})' ''.format(seq_align_mat, error, available_matrices), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise FileNotFoundError(error) else: align_result = align.globalds(target_seq, mobile_seq, seq_align_mat, gap_penalty, gap_penalty)[0] os_util.local_print( 'This is the alignment result to be used in protein alignment:\n{}' ''.format(align_result), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) ref_align_str = [ True if res_j != '-' else False for res_i, res_j in zip(align_result[0], align_result[1]) if res_i != '-' ] mob_align_str = [ True if res_i != '-' else False for res_i, res_j in zip(align_result[0], align_result[1]) if res_j != '-' ] return mob_align_str, ref_align_str
def read_reference_structure(reference_structure, verbosity=0): """ Reads a structure file :param str reference_structure: receptor file :param int verbosity: be verbosity :rtype: pybel.OBMol """ import pybel os_util.local_print( 'Entering extract read_reference_structure(reference_structure={}, verbosity={})' ''.format(reference_structure, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) if isinstance(reference_structure, pybel.Molecule): # Flag that we cannot know the file path, if it's not already present. OpenBabel MoleculeData mimics a dict, # but lacks a setdefault method, so we're doing this the dumb way if not 'file_path' in reference_structure.data: reference_structure.data['file_path'] = False return reference_structure receptor_format = splitext(reference_structure)[1].lstrip('.') if receptor_format == 'pdbqt': receptor_format = 'pdb' os_util.local_print('Reading receptor data from {} as a {} file'.format( reference_structure, receptor_format), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) try: receptor_mol_local = pybel.readfile(receptor_format, reference_structure).__next__() except (ValueError, StopIteration, IOError) as error_data: os_util.local_print( 'Could not read file {}. Format {} was guessed from extension). Error message was "{}"' ''.format(reference_structure, receptor_format, error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: receptor_mol_local.data['file_path'] = reference_structure return receptor_mol_local
def loose_replace_side_chains(mol, core_query, use_chirality=False, verbosity=True): """ Reconstruct a molecule based on common core. First, try to use the regular query. If fails, fallback to generalized bonds then generalized atoms. :param rdkit.Chem.Mol mol: the molecule to be modified :param rdkit.Chem.Mol core_query: the molecule to be used as a substructure query for recognizing the core :param bool use_chirality: match the substructure query using chirality :param int verbosity: set verbosity level :rtype: rdkit.Chem.Mol """ temp_core_structure = rdkit.Chem.Mol(core_query) if num_explict_hydrogens(core_query) > 0 and num_explict_hydrogens(mol) == 0: os_util.local_print('loose_replace_side_chains was called with a mol without explict hydrogens and a ' 'core_query with {} explict hydrogens. Removing core_query explict Hs.' ''.format(num_explict_hydrogens(core_query)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) editable_core = rdkit.Chem.EditableMol(core_query) hydrogen_atoms = [each_atom.GetIdx() for each_atom in core_query.GetAtoms() if each_atom.GetAtomicNum() == 1] for idx in sorted(hydrogen_atoms, reverse=True): editable_core.RemoveAtom(idx) temp_core_structure = editable_core.GetMol() rdkit.Chem.SanitizeMol(temp_core_structure, catchErrors=True) result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_structure, useChirality=use_chirality) if result_core_structure is None: os_util.local_print('rdkit.Chem.ReplaceSidechains failed with mol={} (SMILES="{}") and coreQuery={} ' '(SMARTS="{}"). Retrying with adjust_query_properties.' ''.format(mol, rdkit.Chem.MolToSmiles(mol), temp_core_structure, rdkit.Chem.MolToSmarts(temp_core_structure)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) temp_core_mol = adjust_query_properties(temp_core_structure, verbosity=verbosity) result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_mol, useChirality=use_chirality) if result_core_structure is None: os_util.local_print('rdkit.Chem.ReplaceSidechains failed with mol={} (SMILES="{}") and coreQuery={} ' '(SMARTS="{}"). Retrying with adjust_query_properties setting generic_atoms=True.' ''.format(mol, rdkit.Chem.MolToSmiles(mol), temp_core_structure, rdkit.Chem.MolToSmarts(temp_core_structure)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) temp_core_mol = adjust_query_properties(temp_core_structure, generic_atoms=True, verbosity=verbosity) result_core_structure = rdkit.Chem.ReplaceSidechains(mol, temp_core_mol, useChirality=use_chirality) return result_core_structure
def read_reference_structure(reference_structure, verbosity=0): """ Reads a structure file :param str reference_structure: receptor file :param int verbosity: be verbosity :rtype: pybel.OBMol """ import pybel os_util.local_print( 'Entering extract read_reference_structure(reference_structure={}, verbosity={})' ''.format(reference_structure, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) if isinstance(reference_structure, pybel.Molecule): return reference_structure receptor_format = splitext(reference_structure)[1].lstrip('.') if receptor_format == 'pdbqt': receptor_format = 'pdb' os_util.local_print('Reading receptor data from {} as a {} file'.format( reference_structure, receptor_format), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) try: receptor_mol_local = pybel.readfile(receptor_format, reference_structure).__next__() except (ValueError, StopIteration, IOError) as error_data: os_util.local_print( 'Could not read file {}. Format {} was guessed from extension). Error message was "{}"' ''.format(reference_structure, receptor_format, error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: return receptor_mol_local
def test_center_molecule(map_bias, all_molecules, verbosity=0): """ Test center molecule to prepare star or wheel maps :param [list, str] map_bias: test this bias string or list :param list all_molecules: all molecules read com input :param int verbosity: sets the verbosity level :rtype: str """ map_bias = os_util.detect_type(map_bias, test_for_list=True) if not map_bias: os_util.local_print( 'A star map requires one, and only one, center molecule. You supplied none.', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) if isinstance(map_bias, list) and len(map_bias) > 1: os_util.local_print( 'A star map requires one, and only one, center molecule. You supplied {} ({})' ''.format(len(map_bias), map_bias), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) if isinstance(map_bias, list): map_bias = map_bias[0] if map_bias not in all_molecules: os_util.local_print( 'The center molecule you supplied ({}) not found in {}.' ''.format(map_bias, ', '.join(all_molecules)), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise ValueError('Molecule not found') return map_bias
def extract_autodock4_poses(ligands_dict, poses_data=None, no_checks=False, verbosity=0): """ :param dict ligands_dict: dictionary containing ligands data :param str poses_data: file with poses to be used :param bool no_checks: ignore checks and tries to go on :param int verbosity: be verbosity :rtype: dict """ awk_extract_poses = """ BEGIN {{ Found = 0 FoundM = 0 }} $0 == "\tLOWEST ENERGY DOCKED CONFORMATION from EACH CLUSTER" {{ Found = 1 }} Found == 1 && $1 == "MODEL" {{ FoundM+=1 if (FoundM > {0}) {{ exit }} }} FoundM == {0} {{ print $0 if ($0 == "ENDMDL") {{ exit }} }} """ #FIXME: fix this method if isinstance(poses_data, str): raw_data = os_util.read_file_to_buffer(poses_data, die_on_error=True, return_as_list=True, error_message='Failed to read poses data file.', verbosity=verbosity) docking_poses_data = {} for each_line in raw_data: if (len(each_line) <= 1) or (each_line[0] in [';', '#']): continue lig_data = each_line.split('=') try: docking_poses_data[lig_data[0].rstrip()] = int(lig_data[1]) except (ValueError, IndexError) as error_data: os_util.local_print('Could not read line "{}" from file {} with error {}' ''.format(each_line, poses_data, error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) elif isinstance(poses_data, dict): docking_poses_data = poses_data else: docking_poses_data = {} os_util.local_print('{:=^50}\n{:<15} {:<15} {:<15}'.format(' Autodock4 poses ', 'Name', 'File', 'Cluster #'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) for each_name, each_mol in ligands_dict.items(): # Extract cluster data and reads it cluster_num = docking_poses_data.get(each_name, 1) try: docking_cluster_pdb = subprocess.check_output(['awk', awk_extract_poses.format(cluster_num), each_mol]) except subprocess.CalledProcessError as error_data: os_util.local_print('Could not run external program. Error: {}'.format(error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) if not no_checks: raise SystemExit(1) else: os_util.local_print('{:<15} {:<18} {:<15} ERROR!!!' ''.format(each_name, each_mol, cluster_num), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) continue else: mol_awk_result = docking_cluster_pdb.decode(sys.stdout.encoding) if len(mol_awk_result) < 3: os_util.local_print('Failed to read cluster {} from file {}.' ''.format(each_mol, cluster_num), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) if not no_checks: raise SystemExit(1) else: os_util.local_print('{:<15} {:<18} {:<15} ERROR!!!' ''.format(each_name, each_mol, cluster_num), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) continue original_file_path = ligands_dict[each_name] ligands_dict[each_name] = all_classes.Namespace() ligands_dict[each_name].format = 'pdbqt' ligands_dict[each_name].data = mol_awk_result ligands_dict[each_name].comment = '{} cluster {}'.format(original_file_path, cluster_num) return extract_docking_poses(ligands_dict, verbosity=verbosity)
def process_dummy_atoms(molecule, verbosity=0): """ Sanitizes dummy atoms in a rdkit.Chem.Mol :param rdkit.Chem.Mol molecule: molecule to be verified :param int verbosity: controls the verbosity level :rtype: rdkit.Chem.Mol """ os_util.local_print('Entering process_dummy_atoms(molecule=({}; SMILES={}), verbosity={})' ''.format(molecule, rdkit.Chem.MolToSmiles(molecule), verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) # Iterates over a copy of molecule ahd convert query atoms to dummy atoms, adding bonds if necessary temp_mol = rdkit.Chem.Mol(molecule) for atom_idx, each_atom in enumerate(temp_mol.GetAtoms()): if isinstance(each_atom, rdkit.Chem.rdchem.QueryAtom): newdummy = rdkit.Chem.Atom(0) rdedmol = rdkit.Chem.RWMol(molecule) rdedmol.ReplaceAtom(atom_idx, newdummy, preserveProps=True) molecule = rdedmol.GetMol() if each_atom.GetProp('_TriposAtomName')[:2] == 'LP': os_util.local_print('Lone pair found. Atom with id {} was assumed a lone pair by its name ({}) and ' 'its type ({}). If this is wrong, please change the atom name.' ''.format(atom_idx, each_atom.GetProp('_TriposAtomName'), each_atom.GetProp('_TriposAtomType')), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) if each_atom.GetBonds() == (): if temp_mol.GetNumConformers() == 0: os_util.local_print('Disconnected lone pair atom found in a molecule with no 3D coordinates. ' '3D coordinates are used to guess the LP host, but are absent in molecule ' '{}. I cannot continue.'.format(temp_mol), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) rdkit.Chem.rdMolTransforms.CanonicalizeConformer(temp_mol.GetConformer(0)) # LP is not bonded to any other atom. Connect it to the closer one import numpy temp_mol.GetConformer(0) dist_list = numpy.argsort(numpy.array(rdkit.Chem.Get3DDistanceMatrix(temp_mol)[atom_idx])) closer_atom = int(dist_list[1]) rdedmol = rdkit.Chem.RWMol(molecule) rdedmol.AddBond(atom_idx, closer_atom) molecule = rdedmol.GetMol() rdkit.Chem.SanitizeMol(molecule) os_util.local_print('Lonepair {} (id: {}) is not explicitly bonded to any atom in molecule, ' 'connecting it to the closer atom {} (id: {}). Please, check the output' ''.format(molecule.GetAtomWithIdx(atom_idx).GetProp('_TriposAtomName'), atom_idx, molecule.GetAtomWithIdx(closer_atom).GetProp('_TriposAtomName'), closer_atom), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: # FIXME: support other dummy atoms (eg: in linear molecules) os_util.local_print('The molecule {} contains dummy atoms which are not lonepairs. This is not ' 'supported.'.format(molecule), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) return molecule
def verify_molecule_name(molecule, moldict, new_default_name=None, verbosity=0): """ Verify the a molecule name exists and is unique and return a valid name the molecule :param [rdkit.Chem.Mol, str] molecule: molecule to be verified :param dict moldict: dict of read molecules :param str new_default_name: if molecule lacks a name, use this name instead (Default: generate a random name) :param int verbosity: controls the verbosity level :rtype: str """ if isinstance(molecule, rdkit.Chem.Mol): try: this_mol_name = molecule.GetProp('_Name') except KeyError: this_mol_name = None else: if not molecule: this_mol_name = None else: this_mol_name = molecule if this_mol_name is None: if new_default_name: this_mol_name = new_default_name else: this_mol_name = '(mol_{})'.format(numpy.random.randint(1, 999999999)) while this_mol_name in moldict: this_mol_name = '(mol_{})'.format(numpy.random.randint(1, 999999999)) if isinstance(molecule, rdkit.Chem.Mol): os_util.local_print('Molecule {} have no name. Molecule name is used to save molecule data ' 'and serves as an index. I will generate a random name for it, namely: {}' ''.format(rdkit.Chem.MolToSmiles(molecule), this_mol_name), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: os_util.local_print('A molecule have no name. Molecule name is used to save molecule data ' 'and serves as an index. I will generate a random name for it, namely: {}' ''.format(this_mol_name), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) if this_mol_name in moldict: colliding_name = this_mol_name this_mol_name = '{}_1'.format(this_mol_name) while this_mol_name in moldict: this_mol_name = this_mol_name[:-1] + str(int(this_mol_name[-1]) + 1) if isinstance(molecule, rdkit.Chem.Mol): os_util.local_print('Two molecules (Smiles: {} and {}) have the same name {}. Molecule name is used to ' 'save molecule data and serves as an index. I will rename molecule {} to {}' ''.format(rdkit.Chem.MolToSmiles(moldict[colliding_name]), rdkit.Chem.MolToSmiles(molecule), colliding_name, rdkit.Chem.MolToSmiles(molecule), this_mol_name), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: os_util.local_print('Two molecules have the same name {}. Molecule name is used to ' 'save molecule data and serves as an index. I will rename the last molecule {}' ''.format(colliding_name, this_mol_name), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) if isinstance(molecule, rdkit.Chem.Mol): molecule.SetProp('_Name', this_mol_name) return this_mol_name
def rwmol_to_obmol(rdkit_rwmol, verbosity=0): """ Converts a rdkit.RWMol to openbabel.OBMol :param rdkit.Chem.rdchem.Mol rdkit_rwmol: the ROMol to be converted :param int verbosity: be verbosity :rtype: pybel.ob.OBMol """ import pybel if isinstance(rdkit_rwmol, pybel.ob.OBMol): os_util.local_print('Molecule {} (SMILES={}) is already a pybel.ob.OBMol' ''.format(rdkit_rwmol, pybel.Molecule(rdkit_rwmol).write('smi')), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) return rdkit_rwmol if isinstance(rdkit_rwmol, pybel.Molecule): os_util.local_print('Molecule {} (SMILES={}) is already a a pybel.Molecule, converting to pybel.ob.OBMol only' ''.format(rdkit_rwmol, rdkit.Chem.MolToSmiles(rdkit_rwmol)), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) return rdkit_rwmol.OBMol # Set some lookups _bondorders = {rdkit.Chem.BondType.SINGLE: 1, rdkit.Chem.rdchem.BondType.UNSPECIFIED: 1, rdkit.Chem.BondType.DOUBLE: 2, rdkit.Chem.BondType.TRIPLE: 3, rdkit.Chem.BondType.AROMATIC: 5} _bondstereo = {rdkit.Chem.rdchem.BondStereo.STEREONONE: 0, rdkit.Chem.rdchem.BondStereo.STEREOE: 1, rdkit.Chem.rdchem.BondStereo.STEREOZ: 2} new_obmol = pybel.ob.OBMol() new_obmol.BeginModify() # Assign atoms for index, each_atom in enumerate(rdkit_rwmol.GetAtoms()): new_atom = new_obmol.NewAtom() new_atom.SetAtomicNum(each_atom.GetAtomicNum()) new_atom.SetFormalCharge(each_atom.GetFormalCharge()) new_atom.SetImplicitValence(each_atom.GetImplicitValence()) if each_atom.GetIsAromatic(): new_atom.SetAromatic() new_atom.SetVector(rdkit_rwmol.GetConformer().GetAtomPosition(index).x, rdkit_rwmol.GetConformer().GetAtomPosition(index).y, rdkit_rwmol.GetConformer().GetAtomPosition(index).z) # Assing bonds for each_bond in rdkit_rwmol.GetBonds(): new_obmol.AddBond(each_bond.GetBeginAtomIdx() + 1, each_bond.GetEndAtomIdx() + 1, _bondorders[each_bond.GetBondType()]) if each_bond.GetIsAromatic(): new_obmol.GetBond(each_bond.GetBeginAtomIdx() + 1, each_bond.GetEndAtomIdx() + 1).SetAromatic() # FIXME: assign stereochemistry new_obmol.EndModify() os_util.local_print('Converted rdkit molecule SMILES {} to an openbabel molecule SMILES: {}' ''.format(rdkit.Chem.MolToSmiles(rdkit_rwmol), pybel.Molecule(new_obmol).write('smi')), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info) return new_obmol
def extract_docking_poses(ligands_dict, no_checks=False, verbosity=0): """ :param dict ligands_dict: dict containing docking poses :param bool no_checks: ignore checks and tries to go on :param int verbosity: be verbosity :rtype: dict """ os_util.local_print( 'Entering extract_docking_poses(poses_data={}, verbosity={})' ''.format(ligands_dict, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) os_util.local_print('{:=^50}\n{:<15} {:<20}'.format( ' Poses read ', 'Name', 'File'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) docking_mol_local = {} for each_name, each_mol in ligands_dict.items(): if isinstance(each_mol, str): ligand_format = splitext(each_mol)[1].lower() docking_mol_rd = generic_mol_read(ligand_format, each_mol, verbosity=verbosity) elif isinstance(each_mol, all_classes.Namespace): docking_mol_rd = generic_mol_read(each_mol.format, each_mol.data, verbosity=verbosity) elif isinstance(each_mol, dict): if isinstance(each_mol['molecule'], rdkit.Chem.Mol): docking_mol_rd = each_mol['molecule'] else: ligand_format = each_mol.setdefault( 'format', os.path.splitext(each_mol['molecule'])[1]) docking_mol_rd = generic_mol_read(ligand_format, each_mol['molecule'], verbosity=verbosity) elif isinstance(each_mol, rdkit.Chem.Mol): docking_mol_rd = each_mol else: os_util.local_print( "Could not understand type {} (repr: {}) for your ligand {}" "".format(type(each_mol), repr(each_mol), each_name), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise TypeError('Ligand must be str or all_classes.Namespace') if docking_mol_rd is not None: os_util.local_print("Read molecule {} from {}" "".format(each_name, each_mol), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info) docking_mol_rd = mol_util.process_dummy_atoms(docking_mol_rd, verbosity=verbosity) # docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd, verbosity=verbosity) docking_mol_local[each_name] = docking_mol_rd os_util.local_print('{:<15} {:<18}'.format(each_name, str(each_mol)), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) os_util.local_print('Read molecule {} (SMILES: {}) from file {}' ''.format( each_name, rdkit.Chem.MolToSmiles(docking_mol_rd), each_mol), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) elif no_checks: os_util.local_print( 'Could not read data in {} using rdkit. Falling back to openbabel. It is strongly ' 'advised you to check your file and convert it to a valid mol2.' ''.format(str(each_mol)), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) import pybel if verbosity <= 3: pybel.ob.obErrorLog.SetOutputLevel(pybel.ob.obError) try: if type(each_mol) == str: ligand_format = splitext(each_mol)[1].lstrip('.').lower() docking_mol_ob = pybel.readfile(ligand_format, each_mol).__next__() elif type(each_mol) == all_classes.Namespace: docking_mol_ob = pybel.readstring(each_mol.format, each_mol.data) else: os_util.local_print( "Could not understand type {} (repr: {}) for your ligand {}" "".format(type(each_mol), repr(each_mol), each_name)) raise TypeError( 'Ligand must be str or all_classes.Namespace') except (OSError, StopIteration) as error_data: os_util.local_print( 'Could not read your ligand {} from {} using rdkit nor openbabel. Please ' 'check/convert your ligand file. Openbabel error was: {}' ''.format(each_name, str(each_mol), error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) if not no_checks: raise SystemExit(1) else: # Convert and convert back to apply mol_util.process_dummy_atoms docking_mol_rd = mol_util.process_dummy_atoms( mol_util.obmol_to_rwmol(docking_mol_ob)) #docking_mol_local[each_name] = mol_util.rwmol_to_obmol(docking_mol_rd) docking_mol_local[each_name] = docking_mol_rd os_util.local_print( '{:<15} {:<18}' ''.format( each_name, each_mol['comment'] if isinstance( each_mol, dict) else each_mol), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) os_util.local_print( 'Extracted molecule {} (SMILES: {}) using openbabel fallback from {}.' ''.format(each_name, rdkit.Chem.MolToSmiles(docking_mol_rd), str(each_mol)), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) else: os_util.local_print( 'Could not read data in {} using rdkit. Please, check your file and convert it to a ' 'valid mol2. (You can also use "no_checks" to enable reading using pybel)' ''.format(str(each_mol)), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(-1) return docking_mol_local
def superimpose_poses(ligand_data, reference_pose_mol, save_state=None, num_threads=0, num_conformers=200, verbosity=0, **kwargs): """ :param dict ligand_data: dict with the ligands :param str reference_pose_mol: file with reference pose to be used :param int verbosity: be verbosity :param savestate_utils.SavableState save_state: object with saved data :param int num_threads: use this much threads :param int num_conformers: generate this much trial conformers to find a best shape match :param int verbosity: sets the verbosity level :rtype: dict """ os_util.local_print( 'Entering superimpose_poses(ligand_data={}, reference_pose_superimpose={}, save_state={}, ' 'verbosity={}, kwargs={})' ''.format(ligand_data, reference_pose_mol, save_state, verbosity, kwargs), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) # Set default to no MCS kwargs.setdefault('mcs', None) kwargs.setdefault('superimpose_atom_map', {}) # Test for data from a previous run if save_state: rdkit_reference_pose = None if 'superimpose_data' in save_state: try: saved_reference_pose = save_state['superimpose_data'][ 'reference_pose_path'] except KeyError: # Incorrect behavior, there is no reference_pose_path, so we cannot trust in save_state data at all os_util.local_print( 'Unexpected data strucuture in {}. The entry for superimpose data is corrupted.' ' Trying to fix and going on.' ''.format(save_state.data_file), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: if saved_reference_pose == reference_pose_mol: # Reference pose is the same, we can use the data rdkit_reference_pose = save_state['superimpose_data'][ 'reference_pose_superimpose'] if len(save_state['superimpose_data'] ['ligand_dict']) == 0 and verbosity > 0: os_util.local_print( 'No ligand poses were saved from previous run in file {}. I found a entry ' 'for superimpose data, but it is empty.' ''.format(save_state.data_file), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) rdkit_reference_pose = None if rdkit_reference_pose is None: # Create a new superimpose_data entry from time import strftime backup_name = 'superimpose_data_{}'.format( strftime('%d%m%Y_%H%M%S')) save_state['superimpose_data'] = {} save_state['superimpose_data'][ 'reference_pose_path'] = reference_pose_mol rdkit_reference_pose = extract_docking_poses( {'reference': { 'molecule': reference_pose_mol }}, verbosity=verbosity)['reference'] save_state['superimpose_data'][ 'reference_pose_superimpose'] = rdkit_reference_pose save_state['superimpose_data']['ligand_dict'] = {} save_state[backup_name] = save_state['superimpose_data'] # Save whatever we done save_state.save_data() else: # Not saving any data rdkit_reference_pose = extract_docking_poses( {'reference': { 'molecule': reference_pose_mol }}, verbosity=verbosity)['reference'] if not rdkit_reference_pose.HasProp('_Name'): rdkit_reference_pose.SetProp('_Name', '<Superimpose reference pose>') # Extract data from ligands docking_poses_data = extract_docking_poses(ligand_data, verbosity=verbosity) new_docking_poses_data = {} os_util.local_print('{:=^50}\n{:<15} {:<25} {:<15}' ''.format(' Superimposed poses ', 'Name', 'File', 'Note'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) for ligand_name, each_ligand_mol in docking_poses_data.items(): # If possible, load data from previous run if save_state: try: this_ligand = save_state['superimpose_data']['ligand_dict'][ ligand_name] except KeyError: os_util.local_print('Could not find data for ligand {} in {}' ''.format(ligand_name, save_state.data_file), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) else: new_docking_poses_data[ligand_name] = this_ligand os_util.local_print( '{:<15} {:<25} {:<15}' ''.format(ligand_name, str(ligand_data[ligand_name]), 'Read from saved state'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) continue # Tries to find a custom atom match from the atom_map input. Note that this_atommap = kwargs['superimpose_atom_map'].get(ligand_name, None) thismol = merge_topologies.constrained_embed_shapeselect( each_ligand_mol, rdkit_reference_pose, num_threads=num_threads, save_state=save_state, num_conformers=num_conformers, verbosity=verbosity, atom_map=this_atommap, **kwargs) new_docking_poses_data[ligand_name] = thismol if save_state: # Save rdkit Mol save_state['superimpose_data']['ligand_dict'][ ligand_name] = thismol save_state.save_data() os_util.local_print('{:<15} {:<15}'.format( ligand_name, str(ligand_data[ligand_name])), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) os_util.local_print('=' * 50, msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) return new_docking_poses_data
def update_pertubation_image(self, mol_a_name, mol_b_name, core_smarts=None, save=False, verbosity=0, **kwargs): """ Generate mol images describing a pertubation between the ligand pair :param str mol_a_name: name of the molecule A :param str mol_b_name: name of the molecule B :param str core_smarts: use this smarts as common core :param bool save: automatically save data :param int verbosity: controls verbosity level """ # verbosity = 5 self.ligands_data[mol_a_name].setdefault('images', {}) self.ligands_data[mol_a_name]['images'].setdefault('perturbations', {}) self.ligands_data[mol_b_name].setdefault('images', {}) self.ligands_data[mol_b_name]['images'].setdefault('perturbations', {}) import rdkit.Chem this_mol_a = rdkit.Chem.Mol(self.ligands_data[mol_a_name]['molecule']) this_mol_b = rdkit.Chem.Mol(self.ligands_data[mol_b_name]['molecule']) if core_smarts is None: # Get core_smarts using find_mcs from merge_topologies import find_mcs this_mol_a.RemoveAllConformers() this_mol_b.RemoveAllConformers() core_smarts = find_mcs([this_mol_a, this_mol_b], savestate=self, verbosity=verbosity, **kwargs).smartsString try: # Test whether the correct data structure is already present assert len(self.ligands_data[mol_a_name]['images']['perturbations'] [mol_b_name][core_smarts]) > 0 assert len(self.ligands_data[mol_b_name]['images']['perturbations'] [mol_a_name][core_smarts]) > 0 except (KeyError, AssertionError): # It isn't, go on and create the images os_util.local_print( 'Perturbation images for molecules {} and {} with common core "{}" were not found. ' 'Generating it.'.format(mol_a_name, mol_b_name, core_smarts), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) else: return None from rdkit.Chem.Draw import MolDraw2DSVG from rdkit.Chem.AllChem import Compute2DCoords, GenerateDepictionMatching2DStructure core_mol = rdkit.Chem.MolFromSmarts(core_smarts) Compute2DCoords(core_mol) for each_name, each_mol, other_mol in zip([mol_a_name, mol_b_name], [this_mol_a, this_mol_b], [mol_b_name, mol_a_name]): GenerateDepictionMatching2DStructure(each_mol, core_mol, acceptFailure=True) # Draw mol with hydrogens draw_2d_svg = MolDraw2DSVG(300, 150) draw_2d_svg.drawOptions().addStereoAnnotation = True not_common_atoms = [ i.GetIdx() for i in each_mol.GetAtoms() if i.GetIdx() not in each_mol.GetSubstructMatch(core_mol) ] draw_2d_svg.DrawMolecule(each_mol, legend=each_name, highlightAtoms=not_common_atoms) draw_2d_svg.FinishDrawing() svg_data_hs = draw_2d_svg.GetDrawingText() # Draw mol without hydrogens draw_2d_svg = MolDraw2DSVG(300, 150) draw_2d_svg.drawOptions().addStereoAnnotation = True each_mol = rdkit.Chem.RemoveHs(each_mol) not_common_atoms = [ i.GetIdx() for i in each_mol.GetAtoms() if i.GetIdx() not in each_mol.GetSubstructMatch( rdkit.Chem.RemoveHs(core_mol)) ] draw_2d_svg.DrawMolecule(each_mol, legend=each_name, highlightAtoms=not_common_atoms) draw_2d_svg.FinishDrawing() svg_data_nohs = draw_2d_svg.GetDrawingText() perturbation_imgs = self.ligands_data[each_name]['images'][ 'perturbations'] perturbation_imgs.setdefault(other_mol, {})[core_smarts] = { '2d_hs': svg_data_hs, '2d_nohs': svg_data_nohs } if save: self.save_data()
def align_protein(mobile_mol, reference_mol, align_method='openbabel', seq_align_mat='BLOSUM80', gap_penalty=-1, verbosity=0): """ Align mobile_mol to reference_mol using method defined in align_method. Defaults to openbabel.OBAlign, which is fastest. rdkit's GetAlignmentTransform is much slower and may not work on larger systems. :param [rdkit.RWMol, pybel.Molecule] reference_mol: molecule to be used as alignment reference :param [rdkit.RWMol, pybel.Molecule] mobile_mol: rdkit.RWMol molecule to be aligned :param str align_method: method to be used, options are 'openbabel', 'rdkit' :param str seq_align_mat: use this matrix to sequence alignment, only used if sequences differ. Any value from Bio.SubsMat.MatrixInfo :param float gap_penalty: use this gap penalty to sequence alignment, only used if sequences differ. :param int verbosity: be verbosity :rtype: dict """ os_util.local_print( 'Entering align_protein(mobile_mol={}, reference_mol={}, align_method={}, verbosity={})' ''.format(mobile_mol.title, reference_mol.title, align_method, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) if align_method == 'rdkit': # Uses rdkit.Chem.rdMolAlign.GetAlignmentTransform to align mobile_mol to reference_mol import rdkit.Chem.rdMolAlign reference_mol_rwmol = obmol_to_rwmol(reference_mol) if reference_mol_rwmol is None: os_util.local_print('Could not internally convert reference_mol', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) if verbosity >= os_util.verbosity_level.info: os_util.local_print('Dumping data to receptor_mol_error.pdb', msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) reference_mol.write('mol', 'receptor_mol_error.pdb') raise SystemExit(1) mobile_mol_rwmol = obmol_to_rwmol(mobile_mol) if mobile_mol_rwmol is None: os_util.local_print( 'Could not internally convert OpenBabel mobile_mol to a RDKit.Chem.Mol object.', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) os_utils.local_print( 'Done reading and converting reference_mol {} and mobile_mol {}' ''.format(reference_mol_rwmol.GetProp('_Name'), mobile_mol_rwmol.GetProp('_Name')), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) # FIXME: implement this transformation_mat = rdkit.Chem.rdMolAlign.GetAlignmentTransform( reference_mol_rwmol, mobile_mol_rwmol) raise NotImplementedError('rdkit aligment method not implemented') elif align_method == 'openbabel': # FIXME: implement a Biopython-only method from openbabel import OBAlign import pybel reference_mol_seq = reference_mol.write('fasta').split('\n', 1)[1].replace( '\n', '') mobile_mol_seq = mobile_mol.write('fasta').split('\n', 1)[1].replace( '\n', '') if reference_mol_seq != mobile_mol_seq: os_util.local_print( 'Aminoacid sequences of {} and {} differs:\nReference: {}\nMobile: {}' ''.format(reference_mol.title, mobile_mol.title, reference_mol_seq, mobile_mol_seq), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) mob_align_str, ref_align_str = align_sequences_match_residues( mobile_mol_seq, reference_mol_seq, seq_align_mat=seq_align_mat, gap_penalty=gap_penalty, verbosity=verbosity) else: ref_align_str = None mob_align_str = None # Creates a new molecule containing only the selected atoms of both proteins ref_atom_vec = get_position_matrix(reference_mol, ref_align_str) reference_mol_vec = pybel.ob.vectorVector3(ref_atom_vec) mob_atom_vec = get_position_matrix(mobile_mol, mob_align_str) mobile_mol_vec = pybel.ob.vectorVector3(mob_atom_vec) os_util.local_print('Done extracting Ca from {} and {}'.format( reference_mol.title, mobile_mol.title), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) # Align mobile to reference using the Ca coordinates align_obj = OBAlign(reference_mol_vec, mobile_mol_vec) if not align_obj.Align(): os_util.local_print( 'Failed to align mobile_mol {} to reference_mol {}' ''.format(mobile_mol.title, reference_mol.title), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) os_util.local_print('Alignment RMSD is {}'.format(align_obj.GetRMSD()), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) # Prepare translation and rotation matrices reference_mol_center = numpy.array([[a.GetX(), a.GetY(), a.GetZ()] for a in reference_mol_vec ]).mean(0) mobile_mol_center = numpy.array([[a.GetX(), a.GetY(), a.GetZ()] for a in mobile_mol_vec]).mean(0) translation_vector = pybel.ob.vector3(*reference_mol_center.tolist()) centering_vector = pybel.ob.vector3(*(-mobile_mol_center).tolist()) rot_matrix = align_obj.GetRotMatrix() rot_vector_1d = [ rot_matrix.Get(i, j) for i in range(3) for j in range(3) ] os_util.local_print( 'Alignment data:\n\tReference: {}\n\tMobile: {}\n\tCentering: {}\n\tTranslation: {}' '\n\tRotation matrix:\n\t\t{}, {}, {}\n\t\t{}, {}, {}\n\t\t{}, {}, {}' ''.format(reference_mol_center, mobile_mol_center, centering_vector, translation_vector, *rot_vector_1d), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug) return { 'centering_vector': centering_vector, 'translation_vector': translation_vector, 'rotation_matrix': rot_vector_1d } else: # TODO implement a internal alignment method os_util.local_print( 'Unknown alignment method {}. Currently, only "openbabel" is allowed.' .format(align_method), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise ValueError('Unknown alignment method {}.'.format(align_method))
def extract_pdb_poses(poses_data, reference_structure, ligand_residue_name='LIG', verbosity=0, **kwargs): """ :param dict poses_data: dict with the files bearing the poses and the receptor, potentially in a different orientation and conformation :param pybel.Molecule reference_structure: :param str ligand_residue_name: the residues name of the ligand :param int verbosity: sets verbosity level :rtype: dict """ os_util.local_print('{:=^50}\n{:<15} {:<20}'.format( ' Poses read ', 'Name', 'File'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) for k, v in {'seq_align_mat': 'blosum80', 'gap_penalty': -1}: kwargs.setdefault(k, v) docking_mol_local = {} # Iterate over the dict, reading the poses for ligand_name, ligand_dict in poses_data.items(): receptor_format = ligand_dict.split('.')[-1] if receptor_format == 'pdbqt': receptor_format = 'pdb' # pdb and mol2 fills OBResidue, does any other format file do? If so, we have to add it to this list if receptor_format not in ['pdb', 'mol2']: os_util.local_print( 'Using pdb_loader requires a pdb or a mol2 file, but you supplied {}. Try using ' 'generic_loader or converting your input files', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) try: this_pdb_data = pybel.readfile(receptor_format, ligand_dict).__next__() except IOError as error_data: os_util.local_print('Could not read {}. Error: {}'.format( ligand_dict, error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: # Iterates over all residues looking for ligand_name. Note: this will select the first residue named # ligand_name. lig_residue = None for each_res in pybel.ob.OBResidueIter(this_pdb_data.OBMol): if each_res.GetName() == ligand_residue_name: lig_residue = each_res break else: # For was not break, we did not find ligand_name os_util.local_print( 'Could not find ligand molecule {} in file {}\nI have read the following ' 'residues: {}\n' ''.format( ligand_name, ligand_dict, ', '.join([ this_pdb_data.OBMol.GetResidue(i).GetName() for i in range(this_pdb_data.OBMol.NumResidues()) ])), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) dellist = [ each_atom.GetIdx() for each_atom in pybel.ob.OBMolAtomIter(this_pdb_data.OBMol) if each_atom.GetIdx() not in [ atom_in_res.GetIdx() for atom_in_res in pybel.ob.OBResidueAtomIter(lig_residue) ] ] ligand_ob_molecule = pybel.ob.OBMol(this_pdb_data.OBMol) [ ligand_ob_molecule.DeleteAtom(ligand_ob_molecule.GetAtom(a)) for a in reversed(dellist) ] docking_mol_local[ligand_name] = ligand_ob_molecule align_data = align_protein(this_pdb_data, reference_structure, seq_align_mat=kwargs['seq_align_mat'], gap_penalty=kwargs['gap_penalty'], verbosity=verbosity) docking_mol_local[ligand_name].Translate( align_data['centering_vector']) docking_mol_local[ligand_name].Rotate( pybel.ob.double_array(align_data['rotation_matrix'])) docking_mol_local[ligand_name].Translate( align_data['translation_vector']) os_util.local_print('{:<15} {:<20}\n'.format( ligand_name, ligand_dict), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) return docking_mol_local
def obmol_to_rwmol(openbabel_obmol, verbosity=0): """Converts a openbabel.OBMol to rdkit.RWMol Parameters ---------- openbabel_obmol : pybel.ob.OBMol The OBMol to be converted verbosity : int Sets verbosity level Returns ------- rdkit.Chem.Mol Converted molecule """ import pybel if isinstance(openbabel_obmol, rdkit.Chem.Mol): os_util.local_print('Entering obmol_to_rwmol. Molecule {} (Props: {}) is already a rdkit.Chem.Mol object!' ''.format(openbabel_obmol, openbabel_obmol.GetPropsAsDict()), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) return openbabel_obmol elif isinstance(openbabel_obmol, pybel.Molecule): openbabel_obmol = openbabel_obmol.OBMol elif not isinstance(openbabel_obmol, pybel.ob.OBMol): os_util.local_print('Entering obmol_to_rwmol. Molecule {} is a {}, but pybel.Molecule or pybel.ob.OBMol ' 'required.' ''.format(openbabel_obmol, type(openbabel_obmol)), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise ValueError('pybel.Molecule or pybel.ob.OBMol expected, got {} instead'.format(type(openbabel_obmol))) # Set some lookups _bondtypes = {0: rdkit.Chem.BondType.UNSPECIFIED, 1: rdkit.Chem.BondType.SINGLE, 2: rdkit.Chem.BondType.DOUBLE, 3: rdkit.Chem.BondType.TRIPLE, 5: rdkit.Chem.BondType.AROMATIC} _bondstereo = {0: rdkit.Chem.rdchem.BondStereo.STEREONONE, 1: rdkit.Chem.rdchem.BondStereo.STEREOE, 2: rdkit.Chem.rdchem.BondStereo.STEREOZ} rdmol = rdkit.Chem.Mol() rdedmol = rdkit.Chem.RWMol(rdmol) # Use pybel write to trigger residue data evaluation, otherwise we get and StopIteration error pybel.Molecule(openbabel_obmol).write('pdb') try: residue_iter = pybel.ob.OBResidueIter(openbabel_obmol).__next__() except StopIteration: os_util.local_print('Could not read atom names from molecule "{}" (Smiles: {})' ''.format(openbabel_obmol.GetTitle(), pybel.Molecule(openbabel_obmol).write('smi')), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) residue_iter = None # Assign atoms dummy_atoms = set() for index, each_atom in enumerate(pybel.ob.OBMolAtomIter(openbabel_obmol)): if residue_iter is not None and residue_iter.GetAtomID(each_atom)[0:2].upper() in ['LP', 'XX'] \ and each_atom.GetAtomicMass() == 0: dummy_atoms.add(index) rdatom = rdkit.Chem.MolFromSmarts('*').GetAtomWithIdx(0) os_util.local_print('Atom {} was detected as a lone pair because of its name {} and its mass {}' ''.format(index, residue_iter.GetAtomID(each_atom), each_atom.GetAtomicMass()), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info) elif residue_iter is None and each_atom.GetAtomicMass() == 0: dummy_atoms.add(index) rdatom = rdkit.Chem.MolFromSmarts('*').GetAtomWithIdx(0) os_util.local_print('Atom {} was detected as a lone pair because of its mass {} (Note: it was not possible ' 'to read atom name)' ''.format(index, residue_iter.GetAtomID(each_atom), each_atom.GetAtomicMass()), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.info) else: rdatom = rdkit.Chem.Atom(each_atom.GetAtomicNum()) new_atom = rdedmol.AddAtom(rdatom) rdedmol.GetAtomWithIdx(new_atom).SetFormalCharge(each_atom.GetFormalCharge()) rdedmol.SetProp('_TriposAtomName', residue_iter.GetAtomID(each_atom)) if each_atom.IsAromatic(): rdedmol.GetAtomWithIdx(new_atom).SetIsAromatic(True) os_util.local_print('[DEBUG] These are the dummy atoms detected: dummy_atoms={}'.format(dummy_atoms), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug) # Assing bonds for each_bond in pybel.ob.OBMolBondIter(openbabel_obmol): rdedmol.AddBond(each_bond.GetBeginAtomIdx()-1, each_bond.GetEndAtomIdx()-1, _bondtypes[each_bond.GetBondOrder()]) if each_bond.IsAromatic(): rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1, each_bond.GetEndAtomIdx() - 1).SetIsAromatic(True) rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1, each_bond.GetEndAtomIdx() - 1).SetBondType(_bondtypes[5]) # This bond contains a dummy atom, converting bond to a UNSPECIFIED if dummy_atoms.intersection({each_bond.GetBeginAtomIdx() - 1, each_bond.GetEndAtomIdx() - 1}): rdedmol.GetBondBetweenAtoms(each_bond.GetBeginAtomIdx() - 1, each_bond.GetEndAtomIdx() - 1).SetBondType(_bondtypes[0]) os_util.local_print('Bond between atoms {} and {} converted to an UNSPECIFIED type' ''.format(each_bond.GetBeginAtomIdx()-1, each_bond.GetEndAtomIdx()-1), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug) # FIXME: assign stereochemistry rdmol = rdedmol.GetMol() # Copy coordinates, first generate at least one conformer rdkit.Chem.AllChem.EmbedMolecule(rdmol, useRandomCoords=True, maxAttempts=1000, enforceChirality=True, ignoreSmoothingFailures=True) if rdmol.GetNumConformers() != 1: os_util.local_print('Failed to generate coordinates to molecule', current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise ValueError for atom_rdkit, atom_obmol in zip(rdmol.GetAtoms(), pybel.ob.OBMolAtomIter(openbabel_obmol)): this_position = rdkit.Geometry.rdGeometry.Point3D() this_position.x = atom_obmol.x() this_position.y = atom_obmol.y() this_position.z = atom_obmol.z() rdmol.GetConformer().SetAtomPosition(atom_rdkit.GetIdx(), this_position) # Copy data [rdmol.SetProp(k, v) for k, v in pybel.MoleculeData(openbabel_obmol).items()] rdmol.SetProp('_Name', openbabel_obmol.GetTitle()) for each_atom in rdmol.GetAtoms(): if each_atom.GetBonds() != (): continue import numpy dist_list = numpy.argsort(numpy.array(rdkit.Chem.AllChem.Get3DDistanceMatrix(rdmol)[each_atom.GetIdx()])) closer_atom = int(dist_list[1]) rdedmol = rdkit.Chem.RWMol(rdmol) rdedmol.AddBond(each_atom.GetIdx(), closer_atom) rdmol = rdedmol.GetMol() rdkit.Chem.SanitizeMol(rdmol) os_util.local_print('Atom id: {} is not explicitly bonded to any atom in molecule, connecting it to the closer ' 'atom id: {}'.format(each_atom.GetIdx(), closer_atom), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.warning) rdkit.Chem.SanitizeMol(rdmol) os_util.local_print("obmol_to_rwmol converted molecule {} (name: {}). Pybel SMILES: {} to rdkit SMILES: {}" "".format(openbabel_obmol, openbabel_obmol.GetTitle(), pybel.Molecule(openbabel_obmol).write('smi'), rdkit.Chem.MolToSmiles(rdedmol)), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.debug) return rdmol
'Minimum number of runs when all edges can be removed (Default: off)') optimal_opts.add_argument( '--optimal_permanent_edge_threshold', type=float, default=None, help='Edges with this much pheromone become static (Default: off)') process_user_input.add_argparse_global_args(Parser) arguments = process_user_input.read_options( Parser, unpack_section='generate_perturbation_map') progress_data = savestate_util.SavableState(arguments.progress_file) if arguments.input is None: os_util.local_print( 'No input files were provided. Please, do so by using --input or input option in your ' 'configuration file', msg_verbosity=os_util.verbosity_level.error, current_verbosity=arguments.verbose) raise SystemExit(1) if isinstance(arguments.map_communication_frequency, int) and arguments.map_communication_frequency > 0: comm_freq = arguments.map_communication_frequency elif arguments.map_type != 'star': os_util.local_print( 'Could not understand communication frequency (map_communication_frequency) value ' '{}. Value must be a positive integer.' ''.format(arguments.map_communication_frequency), msg_verbosity=os_util.verbosity_level.error, current_verbosity=arguments.verbose) raise SystemExit(1)
def extract_pdb_poses(poses_data, reference_structure, ligand_residue_name='LIG', save_state=None, verbosity=0, **kwargs): """ :param dict poses_data: dict with the files bearing the poses and the receptor, potentially in a different orientation and conformation :param pybel.Molecule reference_structure: :param str ligand_residue_name: the residues name of the ligand :param int verbosity: sets verbosity level :rtype: dict """ os_util.local_print('{:=^50}\n{:<15} {:<20}'.format( ' Poses read ', 'Name', 'File'), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) # Test for data from a previous run saved_pose_data = {} if save_state: if 'pdbpose_data' in save_state: try: saved_reference_structure = save_state['pdbpose_data'][ 'reference_pose_path'] except KeyError: # Incorrect behavior, there is no reference_pose_path, so we cannot trust in save_state data at all os_util.local_print( 'Unexpected data strucuture in {}. The entry for PDB pose data is corrupted.' ' Trying to fix and going on.' ''.format(save_state.data_file), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: if (reference_structure.data['file_path'] and saved_reference_structure == reference_structure.data['file_path']): # Reference pose is the same, we can use the data if len(save_state['pdbpose_data']['ligand_dict']) == 0: os_util.local_print( 'No ligand poses were saved from previous run in file {}. I found a entry ' 'for pdb pose data, but it is empty.' ''.format(save_state.data_file), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) else: os_util.local_print( 'Reading poses data from {}.'.format( save_state.data_file), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) saved_pose_data = save_state['pdbpose_data'][ 'ligand_dict'] else: os_util.local_print( 'PDB poses data from {} was created for reference file {}, while this run uses ' '{} as reference file. Cannot use saved data.' ''.format(save_state.data_file, saved_reference_structure, reference_structure.data['file_path']), msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) for k, v in {'seq_align_mat': 'BLOSUM80', 'gap_penalty': -1}.items(): kwargs.setdefault(k, v) docking_mol_local = {} # Iterate over the dict, reading the poses for ligand_name, ligand_dict in poses_data.items(): # Try to load the ligand data from saved state try: docking_mol_local[ligand_name] = saved_pose_data[ligand_name] except KeyError: pass else: os_util.local_print('Readed {} pose from {}'.format( ligand_name, save_state.data_file), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) continue receptor_format = ligand_dict.split('.')[-1] if receptor_format == 'pdbqt': receptor_format = 'pdb' # pdb and mol2 fills OBResidue, does any other format file do? If so, we have to add it to this list if receptor_format not in ['pdb', 'mol2']: os_util.local_print( 'Using pdb_loader requires a pdb or a mol2 file, but you supplied {}. Try using ' 'generic_loader or converting your input files', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) try: this_pdb_data = pybel.readfile(receptor_format, ligand_dict).__next__() except IOError as error_data: os_util.local_print('Could not read {}. Error: {}'.format( ligand_dict, error_data), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: # Iterates over all residues looking for ligand_name. Note: this will select the first residue named # ligand_name. lig_residue = None for each_res in pybel.ob.OBResidueIter(this_pdb_data.OBMol): if each_res.GetName() == ligand_residue_name: lig_residue = each_res break else: # For was not broken, we did not find ligand_name os_util.local_print( 'Could not find ligand molecule {} in file {} using the residue name {}. I have ' 'read the following residues: {}\n' ''.format( ligand_name, ligand_dict, lig_residue, ', '.join([ this_pdb_data.OBMol.GetResidue(i).GetName() for i in range(this_pdb_data.OBMol.NumResidues()) ])), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) dellist = [ each_atom.GetIdx() for each_atom in pybel.ob.OBMolAtomIter(this_pdb_data.OBMol) if each_atom.GetIdx() not in [ atom_in_res.GetIdx() for atom_in_res in pybel.ob.OBResidueAtomIter(lig_residue) ] ] ligand_ob_molecule = pybel.ob.OBMol(this_pdb_data.OBMol) [ ligand_ob_molecule.DeleteAtom(ligand_ob_molecule.GetAtom(a)) for a in reversed(dellist) ] docking_mol_local[ligand_name] = ligand_ob_molecule align_data = align_protein(this_pdb_data, reference_structure, seq_align_mat=kwargs['seq_align_mat'], gap_penalty=kwargs['gap_penalty'], verbosity=verbosity) docking_mol_local[ligand_name].Translate( align_data['centering_vector']) docking_mol_local[ligand_name].Rotate( pybel.ob.double_array(align_data['rotation_matrix'])) docking_mol_local[ligand_name].Translate( align_data['translation_vector']) os_util.local_print('{:<15} {:<20}'.format(ligand_name, ligand_dict), msg_verbosity=os_util.verbosity_level.default, current_verbosity=verbosity) if save_state: save_dict = { 'reference_pose_path': reference_structure.data['file_path'], 'ligand_dict': { k: rdkit.Chem.PropertyMol.PropertyMol(obmol_to_rwmol(v)) for k, v in docking_mol_local.items() } } save_state['pdbpose_data'] = save_dict save_state['pdbpose_data_{}'.format( strftime('%d%m%Y_%H%M%S'))] = save_dict.copy() save_state.save_data() return docking_mol_local
def run_workers(ant_colony, n_runs=-1, n_threads=1, elitism=-1, comm_freq=20, verbosity=0): """ Run optimization using multiprocessing :param all_classes.AntSolver ant_colony: optimizing object :param int n_runs: number of optimization ants. Default: -1 = automatically determine :param int n_threads: number of threads :param int elitism: use this many best ants for each parallel run to update pheromone matrix (default: -1: use all) :param int comm_freq: communicate between threads this often :param int verbosity: sets verbosity level """ if n_runs == -1: # Automatically setting n_runs if n_threads == -1: n_runs = comm_freq * 20 else: n_runs = n_threads * comm_freq * 20 if 0 < elitism < 1: # Elitism was supplied as ratio, convert to int elitism = int(n_runs / (comm_freq * n_threads) * elitism) if n_threads == -1: os_util.local_print( 'You are using non-threaded code (ie: threads = -1). The implementation of the ACO ' 'algorithm is slightly different when using the non-threaded code. This should only be ' 'used for developing purposes.', msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) for run_n in range(int(n_runs / comm_freq)): os_util.local_print('Optimization round {} out of {}' ''.format(run_n + 1, int(n_runs / comm_freq)), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) # Run a hive results_list = [ each_group for each_group in map( ant_colony.run_multi_ants, itertools.repeat(comm_freq, times=n_threads)) ] if run_n > 0: ant_colony.evaporate_pheromone() # Aggregate results, deposit pheromone ant_colony.solutions.extend([ each_result for each_group in results_list for each_result in each_group ]) [ ant_colony.deposit_pheromone(each_result.pheromone_multiplier, each_result.graph) for each_group in results_list for n, each_result in enumerate( sorted(each_group, key=lambda x: x.cost)) if n < elitism or elitism == -1 ] else: with multiprocessing.Pool(n_threads) as thread_pool: for run_n in range(int(n_runs / (comm_freq * n_threads))): os_util.local_print('Optimization round {} out of {}' ''.format( run_n + 1, int(n_runs / (comm_freq * n_threads))), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) # Run n_threads parallel hives results_list = [ each_group for each_group in thread_pool.map( ant_colony.run_multi_ants, itertools.repeat(comm_freq, times=n_threads)) ] if run_n > 0: ant_colony.evaporate_pheromone() # Aggregate results, deposit pheromone ant_colony.solutions.extend([ each_result for each_group in results_list for each_result in each_group ]) [ ant_colony.deposit_pheromone( each_result.pheromone_multiplier, each_result.graph) for each_group in results_list for n, each_result in enumerate( sorted(each_group, key=lambda x: x.cost)) if n < elitism or elitism == -1 ] # Finish running workers (in case n_runs is not a multiple of comm_freq * n_threads) if len(ant_colony.solutions) < n_runs: results_list = ant_colony.run_multi_ants(n_runs - len(ant_colony.solutions)) ant_colony.solutions.extend(results_list) [ ant_colony.deposit_pheromone(each_result.pheromone_multiplier, each_result.graph) for each_result in results_list ]
def read_options(argument_parser, unpack_section='', user_config_file=None, default_internal_file=None, verbosity=0): """ Process configuration files and command line arguments. Resolution order is arguments > user_config_file > default_config_file. :param argparse.ArgumentParser argument_parser: command line arguments to be processed :param str unpack_section: unpack all variables from this section from user_config_file (if present) and default_config_file :param str user_config_file: read this configuration file, takes precedence over default_config_file :param str default_internal_file: read internal paths and vars from this file, this will not be superseeded by user input :param verbosity: set the verbosity level :rtype: all_classes.Namedlist """ os_util.local_print( 'Entering read_options(argument_parser={}, unpack_section={}, user_config_file={}, ' 'default_config_file={}, verbosity={})' ''.format(argument_parser, unpack_section, user_config_file, default_internal_file, verbosity), msg_verbosity=os_util.verbosity_level.debug, current_verbosity=verbosity) internals = configparser.ConfigParser() if not default_internal_file: read_data = internals.read( os.path.join(os.path.dirname(__file__), 'config', 'internal.ini')) else: read_data = internals.read(default_internal_file) if not read_data: os_util.local_print( 'Failed to read internal data file. Cannot continue. Check your install, this should not ' 'happen'.format(default_internal_file if default_internal_file else 'config/internal.ini'), current_verbosity=verbosity, msg_verbosity=os_util.verbosity_level.error) raise SystemExit(-1) # Reads command line parameters arguments = argument_parser.parse_args() # Reads defaults from default_config_file or from default location default_config_file = os.path.join(os.path.dirname(__file__), internals['default']['default_config']) if arguments.config_file is not None and user_config_file is None: user_config_file = arguments.config_file result_data = configparser.ConfigParser() try: read_files = result_data.read(default_config_file) except IOError: os_util.local_print( 'Failed to read the configuration file {}. I cannot continue without it.' ''.format(default_config_file), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: if not read_files: os_util.local_print( 'Failed to read the configuration file {}. I cannot continue without it.' ''.format(default_config_file), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) result_data = { key: dict(result_data.items(key)) for key in result_data.sections() } if user_config_file: user_file = configparser.ConfigParser() if not user_file.read(user_config_file): os_util.local_print( 'Failed to read the configuration file {}. I cannot continue without it.' ''.format(user_config_file), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) result_data = os_util.recursive_update( result_data, {key: dict(user_file.items(key)) for key in user_file.sections()}) if unpack_section: # Copy all info in section unpack_section to top level result_data = os_util.recursive_update( dict(result_data['globals']), dict(result_data[unpack_section])) # Overwrites values in result_data (ie: read from config files) with those from command line result_data.update( dict(filter(lambda x: x[1] is not None, vars(arguments).items()))) # If values were not provided in config files, load them from argparse defaults (None for most cases) result_data.update( {k: v for k, v in vars(arguments).items() if k not in result_data}) # Detect all types in result_data result_data = all_classes.Namespace( os_util.recursive_map(os_util.detect_type, dict(result_data))) # Programmatically set some global variables if result_data.verbose is None: result_data.verbose = 0 if result_data.quiet: if result_data.verbose > 0: os_util.local_print( 'I cannot be quiet and verbose at once. Please, select only one of them.', msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) else: result_data.verbose = -1 if result_data.verbose <= 2: from rdkit.rdBase import DisableLog DisableLog('rdApp.error') if result_data.threads == 0 or result_data.threads is None: try: from os import sched_getaffinity except ImportError: from os import cpu_count result_data.threads = cpu_count() else: result_data.threads = len(sched_getaffinity(0)) if type(result_data.threads ) != int or result_data.threads < -1 or result_data.threads == 0: os_util.local_print( 'Invalid number of threads supplied or detected. Falling back to threads = 1', msg_verbosity=os_util.verbosity_level.warning, current_verbosity=verbosity) result_data.threads = 1 if result_data.no_checks is None: result_data.no_checks = False if result_data.progress_file is None: result_data.progress_file = 'progress.pkl' result_data['internal'] = all_classes.Namespace( {key: dict(internals.items(key)) for key in internals.sections()}) return result_data
def fill_thermograph(thermograph, molecules, pairlist=None, use_hs=False, threads=1, custom_mcs=None, savestate=None, verbosity=0): """ :param networkx.Graph thermograph: map to be edited :param dict molecules: molecules will be read from this dict, format {'molname': rdkit.Chem.Mol} :param list pairlist: create edges for these pairs (default: create edges for all possible pairs in molecules) :param bool use_hs: consider Hs in the perturbation costs (default: False) :param int threads: run this many threads (default = 1) :param dict custom_mcs: custom mcs and atom maps to be used :param savestate_util.SavableState savestate: saved state data :param int verbosity: set verbosity level """ # Perturbations will connect larger molecules to smaller ones, by default. if not pairlist: pairlist = [(mol_i, mol_j) if molecules[mol_i].GetNumHeavyAtoms() >= molecules[mol_j].GetNumHeavyAtoms() else (mol_j, mol_i) for mol_i, mol_j in itertools.combinations(molecules, 2)] if custom_mcs is None: custom_mcs = {} if not savestate: todo_pairs = pairlist else: todo_pairs = [ pair for pair in pairlist if frozenset([ rdkit.Chem.MolToSmiles(molecules[pair[0]]), rdkit.Chem.MolToSmiles(molecules[pair[1]]) ]) not in savestate.setdefault('mcs_dict', {}) ] for pair in todo_pairs[:]: if frozenset(pair) in custom_mcs or '*' in custom_mcs: del todo_pairs[pair] if len(todo_pairs) > 0: if threads == -1: wrapper_fn_tmp = lambda args, kwargs: os_util.wrapper_fn( find_mcs, args, kwargs) mcs_data = map( wrapper_fn_tmp, [[[molecules[mol_i], molecules[mol_j]], None, verbosity] for (mol_i, mol_j) in todo_pairs], itertools.repeat({ 'completeRingsOnly': True, 'matchValences': True, 'ringMatchesRingOnly': True })) else: with multiprocessing.Pool(threads) as thread_pool: mcs_data = os_util.starmap_unpack( find_mcs, thread_pool, [[[molecules[mol_i], molecules[mol_j]], None, verbosity] for (mol_i, mol_j) in todo_pairs], itertools.repeat({ 'completeRingsOnly': True, 'matchValences': True, 'ringMatchesRingOnly': True })) else: mcs_data = [] if savestate: for each_result, (mol_i, mol_j) in zip(mcs_data, todo_pairs): savestate['mcs_dict'][frozenset([ rdkit.Chem.MolToSmiles(molecules[mol_i]), rdkit.Chem.MolToSmiles(molecules[mol_j]) ])] = each_result savestate.save_data() search_dict = savestate['mcs_dict'] for each_pair in pairlist: if frozenset(each_pair) in custom_mcs: search_dict[frozenset(each_pair)] = custom_mcs[frozenset( each_pair)] elif '*' in custom_mcs: search_dict[frozenset(each_pair)] = custom_mcs['*'] else: search_dict = { frozenset([ rdkit.Chem.MolToSmiles(molecules[mol_i]), rdkit.Chem.MolToSmiles(molecules[mol_j]) ]): each_result for each_result, (mol_i, mol_j) in zip(mcs_data, todo_pairs) } for each_pair in pairlist: if frozenset(each_pair) in custom_mcs: search_dict[frozenset(each_pair)] = custom_mcs[frozenset( each_pair)] elif '*' in custom_mcs: search_dict[frozenset(each_pair)] = custom_mcs['*'] for each_mol_i, each_mol_j in pairlist: this_molkey = frozenset([ rdkit.Chem.MolToSmiles(molecules[each_mol_i]), rdkit.Chem.MolToSmiles(molecules[each_mol_j]) ]) if use_hs: num_core_atoms = rdkit.Chem.MolFromSmarts( search_dict[this_molkey].smartsString).GetNumAtoms() atoms_i = molecules[each_mol_i].GetNumAtoms() atoms_j = molecules[each_mol_j].GetNumAtoms() else: num_core_atoms = rdkit.Chem.MolFromSmarts( search_dict[this_molkey].smartsString).GetNumHeavyAtoms() atoms_i = molecules[each_mol_i].GetNumHeavyAtoms() atoms_j = molecules[each_mol_j].GetNumHeavyAtoms() # The edge cost is the number of perturbed atoms in a hypothetical transformation between the pair. perturbed_atoms = (atoms_i - num_core_atoms) + (atoms_j - num_core_atoms) if perturbed_atoms == 0: os_util.local_print( 'The perturbation between {} and {} would change no heavy atoms. Currently, this is ' 'not supported. Should you need to simulate this perturbation, pass perturbation_map ' 'directly to prepare_dual_topology.py' ''.format(molecules[each_mol_i].GetProp('_Name'), molecules[each_mol_j].GetProp('_Name'))) raise SystemExit(1) thermograph.add_edge(each_mol_i, each_mol_j, perturbed_atoms=perturbed_atoms, desirability=1.0) all_pert_atoms = [ i for _, _, i in thermograph.edges(data='perturbed_atoms') ] # Scale the number of perturbed atoms according to ln(0.2) * median(all_pert_atoms), so that the values are rescaled # to be [0, 1] and the median value will be 0.2 # TODO: configurable beta expression beta = -1.6094379 / median(all_pert_atoms) for (edge_i, edge_j) in thermograph.edges: thermograph[edge_i][edge_j]['cost'] = 1 - exp( beta * thermograph[edge_i][edge_j]['perturbed_atoms'])
def align_sequences_match_residues(mobile_seq, target_seq, seq_align_mat='blosum80', gap_penalty=-1.0, verbosity=0): """ Align two aminoacid sequences using Bio.pairwise2.globalds and substution matrix seq_align_mat, return a tuple with two list of residues to be used in the 3D alignment (mobile, refence) :param str mobile_seq: sequence of mobile protein :param str target_seq: sequence of target protein :param str seq_align_mat: use this substution matrix from Bio.SubsMat.MatrixInfo :param float gap_penalty: gap penalty to the alignment; avoid values too low in module :param int verbosity: sets the verbosity level :rtype: tuple """ try: from Bio.pairwise2 import align seq_align_mat = import_module( 'Bio.SubsMat.MatrixInfo').__dict__[seq_align_mat] except ImportError as error: os_util.local_print( 'Failed to import Biopython with error: {}\nBiopython is necessary to sequence' 'alignment. Sequences to be aligned:\nReference: {}\nMobile: {}' ''.format(error, target_seq, mobile_seq), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise ImportError(error) except KeyError as error: try: from Bio.SubsMat.MatrixInfo import available_matrices except ImportError: os_util.local_print( "Failed to import Biopython. The sequences fo your protein structures mismatch, so I " "need Biopython to align them. See documentation.", msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise SystemExit(1) os_util.local_print( 'Failed to import substitution matrix {} with error: {}\nSubstitution matrix must be one ' 'from Bio.SubsMat.MatrixInfo (in this installation: {})' ''.format(seq_align_mat, error, available_matrices), msg_verbosity=os_util.verbosity_level.error, current_verbosity=verbosity) raise KeyError(error) else: align_result = align.globalds(target_seq, mobile_seq, seq_align_mat, gap_penalty, gap_penalty)[0] os_util.local_print( 'This is the alignment result to be used in protein alignment:\n{}' ''.format(align_result), msg_verbosity=os_util.verbosity_level.info, current_verbosity=verbosity) ref_align_str = [ True if res_j != '-' else False for res_i, res_j in zip(align_result[0], align_result[1]) if res_i != '-' ] mob_align_str = [ True if res_i != '-' else False for res_i, res_j in zip(align_result[0], align_result[1]) if res_j != '-' ] return mob_align_str, ref_align_str