class Angle(ObjectiveProvider): """ Angle class Parameters ---------- threshold : float Optimum angle probes : list of str Atoms that make the angle, expressed as a series of <molecule_name>/<serial_number> strings Returns ------- float Deviation from threshold angle, in degrees """ _validate = { parse.Required('probes'): parse.AssertList(parse.Named_spec("molecule", "atom")), parse.Required('threshold'): parse.Any(parse.Coerce(float), parse.In(['planar'])) } def __init__(self, threshold=None, probes=None, *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self.threshold = threshold self._probes = probes def probes(self, ind): for probe in self._probes: mol, serial = probe for atom in ind.find_molecule(mol).find_atoms(serial): yield atom def evaluate(self, ind): atoms_coords = [a.xformCoord() for a in self.probes(ind)] try: angle = chimera.angle(*atoms_coords) except TypeError: # four atoms, means dihedral angle = chimera.dihedral(*atoms_coords) if self.threshold == 'planar': return abs(math.sin(math.radians(angle))) return abs(self.threshold - angle.real)
class NormalModes(GeneProvider): """ NormalModes class Parameters ---------- method : str Either: - prody : calculate normal modes using prody algorithms - gaussian : read normal modes from a gaussian output file target : str Name of the Gene containing the actual molecule modes : list, optional, default=range(12) Modes to be used to move the molecule group_by : str or callable, optional, default=None group_by_*: algorithm name or callable coarseGrain(prm) which makes ``mol.select().setBetas(i)``, where ``i`` is the index Coarse Grain group, and ``prm`` is ``prody.AtomGroup`` group_lambda : int, optional Either: number of residues per group (default=7), or total mass per group (default=100) path : str Gaussian or prody modes output path. Required if ``method`` is ``gaussian``. write_modes: bool, optional write a ``molecule_modes.nmd`` file with the ProDy modes n_samples : int, optional, default=10000 number of conformations to generate rmsd : float, optional, default=1.0 average RMSD that the conformations will have with respect to the initial conformation Attributes ---------- allele : slice of prody.ensemble Randomly picked coordinates from NORMAL_MODE_SAMPLES NORMAL_MODES : prody.modes normal modes calculated for the molecule or readed from the gaussian frequencies output file stored in a prody modes class (ANM or RTB) NORMAL_MODE_SAMPLES : prody.ensemble configurations applying modes to molecule _original_coords : numpy.array Parent coordinates _chimera2prody : dict _chimera2prody[chimera_index] = prody_index """ _validate = { parse.Required('method'): parse.In(['prody', 'gaussian']), 'path': parse.RelPathToInputFile(), 'write_modes': parse.Boolean, parse.Required('target'): parse.Molecule_name, 'group_by': parse.In(['residues', 'mass', 'calpha', '']), 'group_lambda': parse.All(parse.Coerce(int), parse.Range(min=1)), 'modes': [parse.All(parse.Coerce(int), parse.Range(min=0))], 'n_samples': parse.All(parse.Coerce(int), parse.Range(min=1)), 'rmsd': parse.All(parse.Coerce(float), parse.Range(min=0)) } def __init__(self, method='prody', target=None, modes=None, n_samples=10000, rmsd=1.0, group_by=None, group_lambda=None, path=None, write_modes=False, **kwargs): # Fire up! GeneProvider.__init__(self, **kwargs) self.method = method self.target = target self.modes = modes if modes is not None else range(12) self.max_modes = max(self.modes) + 1 self.n_samples = n_samples self.rmsd = rmsd self.group_by = None self.group_by_options = None self.path = None self.write_modes = write_modes if method == 'prody': if path is None: self.normal_modes_function = self.calculate_prody_normal_modes self.group_by = group_by self.group_by_options = {} if group_lambda is None else { 'n': group_lambda } else: self.path = path self.normal_modes_function = self.read_prody_normal_modes else: # gaussian self.normal_modes_function = self.read_gaussian_normal_modes if path is None: raise ValueError('Path is required if method == gaussian') self.path = path if self.name not in self._cache: self._cache[self.name] = LRU(300) def __ready__(self): """ Second stage of initialization It saves the parent coordinates, calculates the normal modes and initializes the allele """ cached = self._CACHE.get('normal_modes') if not cached: normal_modes, normal_modes_samples, chimera2prody, prody_molecule = self.normal_modes_function( ) self._CACHE['normal_modes'] = normal_modes self._CACHE['normal_modes_samples'] = normal_modes_samples self._CACHE['chimera2prody'] = chimera2prody self._CACHE['original_coords'] = chimeracoords2numpy(self.molecule) if self.write_modes: title = os.path.join(self.parent.cfg.output.path, '{}_modes.nmd'.format(self.molecule.name)) prody.writeNMD(title, normal_modes, prody_molecule) self.allele = random.choice(self.NORMAL_MODES_SAMPLES) def express(self): """ Apply new coords as provided by current normal mode """ c2p = self._chimera2prody for atom in self.molecule.atoms: index = c2p[atom.serialNumber] new_coords = self.allele[index] atom.setCoord(chimera.Point(*new_coords)) def unexpress(self): """ Undo coordinates change """ for i, atom in enumerate(self.molecule.atoms): atom.setCoord(chimera.Point(*self._original_coords[i])) def mate(self, mate): """ .. todo:: Combine coords between two samples in NORMAL_MODES_SAMPLES? Or two samples between diferent NORMAL_MODES_SAMPLES? Or combine samples between two NORMAL_MODES_SAMPLES? For now : pass """ pass def mutate(self, indpb): """ (mutate to/get) another SAMPLE with probability = indpb """ if random.random() < self.indpb: return random.choice(self.NORMAL_MODES_SAMPLES) ##### @property def molecule(self): return self.parent.genes[self.target].compound.mol @property def _CACHE(self): return self._cache[self.name] @property def NORMAL_MODES(self): return self._CACHE.get('normal_modes') @property def NORMAL_MODES_SAMPLES(self): return self._CACHE.get('normal_modes_samples') @property def _chimera2prody(self): return self._CACHE.get('chimera2prody') @property def _original_coords(self): return self._CACHE.get('original_coords') def calculate_prody_normal_modes(self): """ calculate normal modes, creates a diccionary between chimera and prody indices and calculate n_confs number of configurations using this modes """ prody_molecule, chimera2prody = convert_chimera_molecule_to_prody( self.molecule) modes = prody_modes(prody_molecule, self.max_modes, GROUPERS[self.group_by], **self.group_by_options) samples = prody.sampleModes(modes=modes[self.modes], atoms=prody_molecule, n_confs=self.n_samples, rmsd=self.rmsd) samples.addCoordset(prody_molecule) samples_coords = [sample.getCoords() for sample in samples] return modes, samples_coords, chimera2prody, prody_molecule def read_prody_normal_modes(self): prody_molecule, chimera2prody = convert_chimera_molecule_to_prody( self.molecule) modes = prody.parseNMD(self.path)[0] samples = prody.sampleModes(modes=modes[self.modes], atoms=prody_molecule, n_confs=self.n_samples, rmsd=self.rmsd) samples.addCoordset(prody_molecule) samples_coords = [sample.getCoords() for sample in samples] return modes, samples_coords, chimera2prody, prody_molecule def read_gaussian_normal_modes(self): """ read normal modes, creates a diccionary between chimera and prody indices and calculate n_confs number of configurations using this modes """ prody_molecule, chimera2prody = convert_chimera_molecule_to_prody( self.molecule) modes = gaussian_modes(self.path) samples = prody.sampleModes(modes=modes[self.modes], atoms=prody_molecule, n_confs=self.n_samples, rmsd=self.rmsd) samples.addCoordset(prody_molecule) samples_coords = [sample.getCoords() for sample in samples] return modes, samples_coords, chimera2prody, prody_molecule
class Contacts(ObjectiveProvider): """ Contacts class Parameters ---------- probes : str Name of molecule gene that is object of contacts analysis radius : float Maximum distance from any point of probes that is searched for possible interactions which : {'hydrophobic', 'clashes'} Type of interactions to measure clash_threshold : float, optional Maximum overlap of van-der-Waals spheres that is considered as a contact (attractive). If the overlap is greater, it's considered a clash (repulsive) hydrophobic_threshold : float, optional Maximum overlap for hydrophobic patches. hydrophobic_elements : list of str, optional, defaults to [C, S] Which elements are allowed to interact in hydrophobic patches cutoff : float, optional If the overlap volume is greater than this, a penalty is applied. Useful to filter bad solutions. bond_separation : int, optional Ignore clashes or contacts between atoms within n bonds. only_internal : bool, optional If set to True, take into account only intramolecular interactions, defaults to False Returns ------- float Lennard-Jones-like energy when `which`=`hydrophobic`, and volumetric overlap of VdW spheres in A³ if `which`=`clashes`. """ _validate = { parse.Required('probes'): [parse.Molecule_name], 'radius': parse.All(parse.Coerce(float), parse.Range(min=0)), 'which': parse.In(['hydrophobic', 'clashes']), 'clash_threshold': parse.Coerce(float), 'hydrophobic_threshold': parse.Coerce(float), 'cutoff': parse.Coerce(float), 'hydrophobic_elements': [basestring], 'bond_separation': parse.All(parse.Coerce(int), parse.Range(min=2)), 'same_residue': parse.Coerce(bool), 'only_internal': parse.Coerce(bool) } def __init__(self, probes=None, radius=5.0, which='hydrophobic', clash_threshold=0.6, hydrophobic_threshold=-0.4, cutoff=0.0, hydrophobic_elements=('C', 'S'), bond_separation=4, same_residue=True, only_internal=False, *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self.which = which self.radius = radius self.clash_threshold = clash_threshold self.hydrophobic_threshold = hydrophobic_threshold self.cutoff = cutoff self.hydrophobic_elements = set(hydrophobic_elements) self.bond_separation = bond_separation self.same_residue = same_residue self._probes = probes self.only_internal = only_internal if which == 'hydrophobic': self.evaluate = self.evaluate_hydrophobic self.threshold = hydrophobic_threshold else: self.evaluate = self.evaluate_clashes self.threshold = clash_threshold def molecules(self, ind): return [m.compound.mol for m in ind._molecules.values()] def probes(self, ind): return [ind.find_molecule(p).compound.mol for p in self._probes] def evaluate_clashes(self, ind): positive, negative = self.find_interactions(ind) clashscore = sum( abs(vol_overlap) for (a1, a2, overlap, vol_overlap) in negative) if self.cutoff and clashscore > self.cutoff: clashscore = -1000 * self.weight return clashscore def evaluate_hydrophobic(self, ind): positive, negative = self.find_interactions(ind) return sum(lj_energy for (a1, a2, overlap, lj_energy) in positive) def find_interactions(self, ind): atoms = self._surrounding_atoms(ind) options = dict(test=atoms, intraRes=self.same_residue, interSubmodel=True, clashThreshold=self.threshold, assumedMaxVdw=2.1, hbondAllowance=0.2, bondSeparation=self.bond_separation) clashes = DetectClash.detectClash(atoms, **options) return self._analyze_interactions(clashes) def _analyze_interactions(self, clashes): """ Interpret contacts provided by DetectClash. Parameters ---------- clashes : dict of dict Output of DetectClash. It's a dict of atoms, whose values are dicts. These subdictionaries contain all the contacting atoms as keys, and the respective overlaping length as values. Returns ------- positive : list of list Each sublist depict an interaction, with four items: the two involved atoms, their distance, and their Lennard-Jones score. negative : list of list Each sublist depict an interaction, with four items: the two involved atoms, their distance, and their volumetric overlap. .. note :: First, collect atoms that can be involved in hydrophobic interactions. Namely, C and S. Then, iterate the contacting atoms, getting the distances. For each interaction, analyze the distance and, based on the threshold, determine if it's attractive or repulsive. Attractive interactions are weighted with a Lennard-Jones like function (``_lennard_jones``), while repulsive attractions are measured with the volumetric overlap of the involved atoms' Van der Waals spheres. """ positive, negative = [], [] for a1, clash in clashes.items(): for a2, overlap in clash.items(): # overlap < clash threshold : can be a hydrophobic interaction if overlap <= self.clash_threshold: if (a1.element.name in self.hydrophobic_elements and a2.element.name in self.hydrophobic_elements): lj_energy = self._lennard_jones(a1, a2, overlap) positive.append([a1, a2, overlap, lj_energy]) # overlap > clash threshold : clash! else: volumetric_overlap = self._vdw_vol_overlap(a1, a2, overlap) negative.append([a1, a2, overlap, volumetric_overlap]) return positive, negative def _surrounding_atoms(self, ind): """ Get atoms in the search zone, based on the molecule, (possible) rotamer genes and the radius """ self.zone.clear() #Add all atoms of probes molecules self.zone.add([a for m in self.probes(ind) for a in m.atoms]) if not self.only_internal: #Add beta carbons of rotamers to find clashes/contacts in its surroundings rotamer_genes = [ name for name, g in ind.genes.items() if g.__class__.__name__ == 'Rotamers' ] beta_carbons = [] for n in rotamer_genes: for ((molname, pos), residue) in ind.genes[n].residues.items(): beta_carbons.extend( [a for a in residue.atoms if a.name == 'CB']) self.zone.add(beta_carbons) #Surrounding zone from probes+rotamers atoms self.zone.merge( chimera.selection.REPLACE, chimera.specifier.zone(self.zone, 'atom', None, self.radius, self.molecules(ind))) return self.zone.atoms() @staticmethod def _lennard_jones(a1, a2, overlap=None): """ VERY rough approximation of a Lennard-Jones score (12-6). Parameters ---------- a1, a2 : chimera.Atom overlap : float Overlapping radii of involved atoms, as provided by DetectClash. Notes ----- The usual implementation of a LJ potential is: LJ = 4*epsilon*(0.25*((r0/r)**12) - 0.5*((r0/r)**6)) Two approximations are done: - The atoms involves are considered equal, hence the distance at which the energy is minimum (r0) is just the sum of their radii. - Epsilon is always 1. """ r0 = a1.radius + a2.radius if overlap is None: distance = a1.xformCoord().distance(a2.xformCoord()) else: distance = r0 - overlap x = (r0 / distance)**6 return (x * x - 2 * x) @staticmethod def _vdw_vol_overlap(a1, a2, overlap=None): """ Volumetric overlap of Van der Waals spheres of atoms. Parameters ---------- a1, a2 : chimera.Atom overlap : float Overlapping sphere segment of involved atoms .. note :: Adapted from Eran Eyal, Comput Chem 25: 712-724, 2004 """ PI = 3.14159265359 if overlap is None: d = a1.xformCoord().distance(a2.xformCoord()) else: d = a1.radius + a2.radius - overlap if d == 0: return 1000 h_a, h_b = 0, 0 if d < (a1.radius + a2.radius): h_a = (a2.radius**2 - (d - a1.radius)**2) / (2 * d) h_b = (a1.radius**2 - (d - a2.radius)**2) / (2 * d) return (PI / 3) * ((h_a**2) * (3 * a1.radius - h_a) + (h_b**2) * (3 * a2.radius - h_b))
class Solvation(ObjectiveProvider): """ Solvation class Parameters ---------- targets : [str] Names of the molecule genes being analyzed threshold : float, optional, default=0 Optimize the difference to this value radius : float, optional, default=5.0 Max distance to search for neighbor atoms from targets. method : str, optional, default=area Which method should be used. Both methods compute the surface of the solvated molecule. `area` returns the surface area of such surface, while `volume` returns the volume occuppied by the model. Returns ------- float Surface area of solvated shell, in A² (if method=area), or volume of solvated shell, in A³ (if method=volume). """ _validate = { parse.Required('targets'): [parse.Molecule_name], 'threshold': parse.All(parse.Coerce(float), parse.Range(min=0)), 'radius': parse.All(parse.Coerce(float), parse.Range(min=0)), 'method': parse.In(['volume', 'area']) } def __init__(self, targets=None, threshold=0.0, radius=5.0, method='area', *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self._targets = targets self.threshold = threshold self.radius = radius self.method = method if method == 'area': self.evaluate = self.evaluate_area else: self.evaluate = self.evaluate_volume def targets(self, ind): return [ ind.find_molecule(target).compound.mol for target in self._targets ] def molecules(self, ind): return tuple(m.compound.mol for m in ind._molecules.values()) def surface(self, ind): atoms = self.zone_atoms(self.targets(ind), self.molecules(ind)) return grid_sas_surface(atoms) def evaluate_area(self, ind): return abs(surface_area(*self.surface(ind)) - self.threshold) def evaluate_volume(self, ind): return abs(enclosed_volume(*self.surface(ind))[0] - self.threshold) def zone_atoms(self, probes, molecules): self.zone.clear() self.zone.add([a for probe in probes for a in probe.atoms]) if self.radius: self.zone.merge( chimera.selection.REPLACE, chimera.specifier.zone(self.zone, 'atom', None, self.radius, molecules)) return self.zone.atoms()
class Energy(ObjectiveProvider): """ Calculate the energy of a system Parameters ---------- targets : list of str, default=None If set, which molecules should be evaluated. Else, all will be evaluated. forcefields : list of str, default=('amber99sbildn.xml',) Which forcefields to use auto_parametrize: list of str, default=None List of Molecule instances GAUDI should try to auto parametrize with antechamber. parameters : list of 2-item list of str List of (gaff.mol2, .frcmod) files to use as parametrization source. platform : str Which platform to use for calculations. Choose between CPU, CUDA, OpenCL. Returns ------- float The estimated potential energy, in kJ/mol """ _validate = { 'targets': [parse.Molecule_name], 'forcefields': [ parse.Any(parse.ExpandUserPathExists, parse.In(_openmm_builtin_forcefields)) ], 'auto_parametrize': [parse.Molecule_name], 'parameters': [parse.All([parse.ExpandUserPathExists], parse.Length(min=2, max=2))], 'platform': parse.In(['CUDA', 'OpenCL', 'CPU']) } def __init__(self, targets=None, forcefields=('amber99sbildn.xml', ), auto_parametrize=None, parameters=None, platform=None, *args, **kwargs): if kwargs.get('precision', 6) < 6: kwargs['precision'] = 6 ObjectiveProvider.__init__(self, **kwargs) self.auto_parametrize = auto_parametrize self._targets = targets self._parameters = parameters self.platform = platform self.topology = None self._simulation = None additional_ffxml = [] if parameters: additional_ffxml.append(create_ffxml_file(*zip(*parameters))) if auto_parametrize: filenames = [ g.path for m in auto_parametrize for g in self.environment.cfg.genes if g.name == m ] additional_ffxml.append(self._gaff2xml(*filenames)) self._forcefields = tuple(forcefields) + tuple(additional_ffxml) self.forcefield = openmm_app.ForceField(*self._forcefields) def evaluate(self, individual): """ Calculates the energy of current individual Notes ----- For static calculations, where molecules are essentially always the same, but with different coordinates, we only need to generate topologies once. However, for dynamic jobs, with potentially different molecules involved each time, we cannot guarantee having the same topology. As a result, we generate it again for each evaluation. """ molecules = self.molecules(individual) coordinates = self.chimera_molecule_to_openmm_positions(*molecules) # Build topology if it's first time or a dynamic job if self.topology is None or not self._gaudi_is_static(individual): self.topology = self.chimera_molecule_to_openmm_topology( *molecules) self._simulation = None # This forces a Simulation rebuild return self.calculate_energy(coordinates) def molecules(self, individual): if self._targets is None: return [m.compound.mol for m in individual._molecules.values()] else: return [ individual.find_molecule(t).compound.mol for t in self._targets ] @property def simulation(self): """ Build a new OpenMM simulation if not yet defined and return it Notes ----- self.topology must be defined previously! Use self.chimera_molecule_to_openmm_topology to set it. """ if self._simulation is None: system = self.forcefield.createSystem( self.topology, nonbondedMethod=openmm_app.CutoffNonPeriodic, nonbondedCutoff=1.0 * unit.nanometers, rigidWater=True, constraints=None) integrator = openmm.VerletIntegrator(0.001) if self.platform is not None: platform = openmm.Platform.getPlatformByName(self.platform), else: platform = () self._simulation = openmm_app.Simulation(self.topology, system, integrator, *platform) return self._simulation def calculate_energy(self, coordinates): """ Set up an OpenMM simulation with default parameters and return the potential energy of the initial state Parameters ---------- coordinates : simtk.unit.Quantity Positions of the atoms in the system Returns ------- potential_energy : float Potential energy of the system, in kJ/mol """ self.simulation.context.setPositions(coordinates) # Retrieve initial energy state = self.simulation.context.getState(getEnergy=True) return state.getPotentialEnergy()._value @staticmethod def chimera_molecule_to_openmm_topology(*molecules): """ Convert a Chimera Molecule object to OpenMM structure, providing topology and coordinates. Parameters ---------- molecule : chimera.Molecule Returns ------- topology : simtk.openmm.app.topology.Topology coordinates : simtk.unit.Quantity """ # Create topology atoms, residues, chains = {}, {}, {} topology = openmm_app.Topology() for i, mol in enumerate(molecules): for a in mol.atoms: chain_id = (i, a.residue.id.chainId) try: chain = chains[chain_id] except KeyError: chain = chains[chain_id] = topology.addChain() r = a.residue try: residue = residues[r] except KeyError: residue = residues[r] = topology.addResidue(r.type, chain) name = a.name element = openmm_app.Element.getByAtomicNumber( a.element.number) serial = a.serialNumber atoms[a] = topology.addAtom(name, element, residue, serial) for b in mol.bonds: topology.addBond(atoms[b.atoms[0]], atoms[b.atoms[1]]) return topology @staticmethod def chimera_molecule_to_openmm_positions(*molecules): # Get positions positions = [ atom_positions(m.atoms, m.openState.xform) for m in molecules ] all_positions = numpy.concatenate(positions) return unit.Quantity(all_positions, unit=unit.angstrom) @staticmethod def _gaff2xml(*filenames, **kwargs): """ Use OpenMolTools wrapper to run antechamber programatically and auto parametrize requested molecules. Parameters ---------- filenames: list of str List of the filenames of the molecules to parametrize Returns ------- ffxmls : StringIO Compiled ffxml file produced by antechamber and openmoltools converter """ frcmods, gaffmol2s = [], [] for filename in filenames: name = '.'.join(filename.split('.')[:-1]) gaffmol2, frcmod = run_antechamber(name, filename, **kwargs) frcmods.append(frcmod) gaffmol2s.append(gaffmol2) return create_ffxml_file(gaffmol2s, frcmods) def _gaudi_is_static(self, individual): """ Check if this essay is performing topology changes. Genes that can change topologies: - gaudi.genes.rotamers with mutations ON - gaudi.genes.molecule with block building enabled Parameters ---------- individual : gaudi.base.Individual The individual to be analyzed for dynamic behaviour Returns ------- bool """ for gene in individual.genes.values(): if gene.__class__.__name__ == 'Mutamers': if gene.mutations: return False if gene.__class__.__name__ == 'Molecule': if len(gene.catalog) > 1: return False return True
class Gold(ObjectiveProvider): """ Gold class Parameters ---------- protein : str The name of molecule acting as protein ligand : str The name of molecule acting as ligand scoring : str, optional, defaults to chemscore Fitness function to use. Choose between chemscore, chemplp, goldscore and asp. score_component : str, optional, defaults to 'Score' Scoring fields to parse out of the rescore.log file, such as Score, DG, S(metal), etc. radius : float, optional, defaults to 10.0 Radius (in A) of binding site sphere, the origin of which is automatically centered at the ligand's center of mass. Returns ------- float Interaction energy as reported by GOLD's chosen scoring function """ _validate = { parse.Required('protein'): parse.Molecule_name, parse.Required('ligand'): parse.Molecule_name, 'scoring': parse.In(['chemscore', 'chemplp', 'goldscore', 'asp']), 'radius': parse.Coerce(float), 'score_component': str, } def __init__(self, protein='Protein', ligand='Ligand', scoring='chemscore', score_component='Score', radius=10, *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self.protein_names = [protein] self.ligand_names = [ligand] self.scoring = scoring self.score_component = score_component self.radius = radius self.executable = find_executable('gold_auto') if self.executable is None: sys.exit( 'GOLD could not be found in $PATH. Is it (correctly) installed?' ) self._oldworkingdir = os.getcwd() self._paths = {} if os.name == 'posix' and os.path.exists('/dev/shm'): self.tmpdir = '/dev/shm' else: self.tmpdir = default_tempdir() def get_molecule_by_name(self, ind, *names): """ Get a molecule gene instance of individual by its name """ for name in names: yield ind.find_molecule(name) def evaluate(self, ind): """ Run a subprocess calling LigScore binary with provided options, and parse the results. Clean tmp files at exit. """ self.tmpfile = os.path.join(self.tmpdir, next(tempnames())) proteins = list(self.get_molecule_by_name(ind, *self.protein_names)) ligands = list(self.get_molecule_by_name(ind, *self.ligand_names)) protein_path = self.prepare_proteins(proteins) ligand_path = self.prepare_ligands(ligands) origin = self.origin(ligands[0]) command = self.prepare_command(protein_path, ligand_path, origin) try: os.chdir(self.tmpdir) p = subprocess.call(command) return self.parse_output('rescore.log') except (subprocess.CalledProcessError, IOError): logger.warning("Could not run GOLD with command %s", command) return -100000 * self.weight finally: self.clean() os.chdir(self._oldworkingdir) def prepare_proteins(self, proteins): proteinpath = '{}_proteins.pdb'.format(self.tmpfile) last_protein = proteins.pop() last_protein.write(absolute=proteinpath, combined_with=proteins, filetype='pdb') self._paths['proteins'] = proteinpath return proteinpath def prepare_ligands(self, ligands): ligandpath = '{}_ligands.mol2'.format(self.tmpfile) ligand_mols = [lig.compound.mol for lig in ligands] writeMol2(ligand_mols, ligandpath, temporary=True, multimodelHandling='combined') self._paths['ligands'] = ligandpath return ligandpath def origin(self, molecule): molecule = molecule.compound.mol coordinates = atom_positions(molecule.atoms, molecule.openState.xform) masses = np.fromiter((a.element.mass for a in molecule.atoms), dtype='float32', count=molecule.numAtoms) return np.average(coordinates, axis=0, weights=masses) def prepare_command(self, protein_path, ligand_path, origin): replaces = dict(PROTEIN=protein_path, LIGAND=ligand_path, ORIGIN='{} {} {}'.format(*origin), SCORING=self.scoring, RADIUS=self.radius) inputfile = _TEMPLATE.safe_substitute(replaces) inputfilepath = self.tmpfile + '.conf' with open(self.tmpfile + '.conf', 'w') as f: f.write(inputfile) self._paths['conf'] = inputfilepath return [self.executable, inputfilepath] def parse_output(self, filename): """ Get last word of first line (and unique) and parse it into float """ fitness = 4 with open(filename) as f: for line in f: if not line.strip(): continue fields = line.split() if fields[0] == 'Status': fitness = fields.index(self.score_component) elif fields[0] == 'Ok': return float(fields[fitness]) def clean(self): for p in self._paths.values(): os.remove(p) self._paths.clear()
class Distance(ObjectiveProvider): """ Distance class Parameters ---------- threshold : float Optimum distance to meet tolerance : float Maximum deviation from threshold that is not penalized target : str The atom to measure the distance to, expressed as <molecule name>/<atom serial> probes : list of str The atoms whose distance to `target` is being measured, expressed as <molecule name>/<atom serial>. If more than one is provided, the average of all of them is returned center_of_mass : bool Returns ------- float (Mean of) absolute deviation from threshold distance, in A. """ _validate = { parse.Required('probes'): parse.AssertList(parse.Named_spec("molecule", "atom")), parse.Required('target'): parse.Any(parse.Named_spec("molecule", "atom"), parse.Coordinates), parse.Required('threshold'): parse.Any(parse.Coerce(float), parse.In(['covalent'])), 'tolerance': parse.Coerce(float), 'center_of_mass': parse.Coerce(float) } def __init__(self, threshold=None, tolerance=None, target=None, probes=None, center_of_mass=False, *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self.threshold = threshold self.tolerance = tolerance self.center_of_mass = center_of_mass self._probes = probes self._target = target if self.center_of_mass: self.evaluate = self.evaluate_center_of_mass else: self.evaluate = self.evaluate_distances def atoms(self, ind, *targets): for target in targets: mol, serial = target for atom in ind.find_molecule(mol).find_atoms(serial): yield atom def evaluate_distances(self, ind): """ Measure the distance """ distances = [] if isinstance(self._target[0], basestring): # AtomSpec like 'Molecule/1' target = ind.find_molecule(self._target.molecule ).find_atom(self._target.atom).xformCoord() else: # coordinates target = chimera.Point(*self._target) for a in self.atoms(ind, *self._probes): d = self._distance(a, target) if self.threshold == 'covalent': threshold = chimera.Element.bondLength(a.element, target.element) else: threshold = self.threshold d = d - threshold if self.tolerance is not None and d < self.tolerance: distances.append(-1000 * self.weight) else: distances.append(d) return numpy.mean(numpy.absolute(distances)) def evaluate_center_of_mass(self, ind): target = ind.find_molecule(self._target.molecule).find_atom(self._target.atom) probes = list(self.atoms(ind, *self._probes)) center_of_mass = self._center(*probes) return target.xformCoord().distance(chimera.Point(*center_of_mass)) @staticmethod def _distance(atom, target): return atom.xformCoord().distance(target) @staticmethod def _center(*atoms): coords, masses = [], [] for a in atoms: coords.append(a.xformCoord()) masses.append(a.element.mass) return numpy.average(coords, axis=0, weights=masses)
class LigScore(ObjectiveProvider): """ LigScore class Parameters ---------- proteins : list of str The name of molecules that are acting as proteins ligands : list of str The name of molecules that are acting as ligands binary : str, optional Path to ligand_score executable library : str, optional Path to LigScore lib file Returns ------- float Interaction energy as reported by IMP's ligand_score. """ _validate = { parse.Required('proteins'): [parse.Molecule_name], parse.Required('ligands'): [parse.Molecule_name], 'method': parse.In(['rank', 'pose']), 'binary': parse.ExpandUserPathExists, 'library': parse.ExpandUserPathExists, } def __init__(self, proteins=('Protein', ), ligands=('Ligand', ), method='pose', binary=None, library=None, *args, **kwargs): ObjectiveProvider.__init__(self, **kwargs) self.protein_names = proteins self.ligand_names = ligands self.binary = find_executable( 'ligand_score') if binary is None else binary self.library = library self.method = method self._oldworkingdir = os.getcwd() self._paths = {} if os.name == 'posix' and os.path.exists('/dev/shm'): self.tmpdir = '/dev/shm' else: self.tmpdir = default_tempdir() def get_molecule_by_name(self, ind, *names): """ Get a molecule gene instance of individual by its name """ for name in names: yield ind.find_molecule(name) def evaluate(self, ind): """ Run a subprocess calling LigScore binary with provided options, and parse the results. Clean tmp files at exit. """ self.tmpfile = os.path.join(self.tmpdir, next(tempnames())) proteins = list(self.get_molecule_by_name(ind, *self.protein_names)) ligands = list(self.get_molecule_by_name(ind, *self.ligand_names)) protein_path = self.prepare_proteins(proteins) ligand_path = self.prepare_ligands(ligands) command = self.prepare_command(protein_path, ligand_path) try: os.chdir(self.tmpdir) stream = subprocess.check_output(command, universal_newlines=True) except subprocess.CalledProcessError: logger.warning("Could not run LigScore with command %s", command) return -100000 * self.weight else: return self.parse_output(stream) finally: self.clean() os.chdir(self._oldworkingdir) def prepare_proteins(self, proteins): proteinpath = '{}_proteins.pdb'.format(self.tmpfile) last_protein = proteins.pop() last_protein.write(absolute=proteinpath, combined_with=proteins, filetype='pdb') self._paths['proteins'] = proteinpath return proteinpath def prepare_ligands(self, ligands): ligandpath = '{}_ligands.mol2'.format(self.tmpfile) ligand_mols = [lig.compound.mol for lig in ligands] writeMol2(ligand_mols, ligandpath, temporary=True, multimodelHandling='combined') self._paths['ligands'] = ligandpath return ligandpath def prepare_command(self, protein_path, ligand_path): cmd = [self.binary, '--' + self.method, ligand_path, protein_path] if self.library: cmd.append(self.library) return map(str, cmd) def parse_output(self, stream): """ Get last word of first line (and unique) and parse it into float """ return float(stream.splitlines()[0].split()[-1]) def clean(self): for p in self._paths.values(): os.remove(p) self._paths.clear()