Ejemplo n.º 1
0
class Angle(ObjectiveProvider):
    """
    Angle class

    Parameters
    ----------
    threshold : float
        Optimum angle
    probes : list of str
        Atoms that make the angle, expressed as a series of 
        <molecule_name>/<serial_number> strings

    Returns
    -------
    float
        Deviation from threshold angle, in degrees
    """

    _validate = {
        parse.Required('probes'):
        parse.AssertList(parse.Named_spec("molecule", "atom")),
        parse.Required('threshold'):
        parse.Any(parse.Coerce(float), parse.In(['planar']))
    }

    def __init__(self, threshold=None, probes=None, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.threshold = threshold
        self._probes = probes

    def probes(self, ind):
        for probe in self._probes:
            mol, serial = probe
            for atom in ind.find_molecule(mol).find_atoms(serial):
                yield atom

    def evaluate(self, ind):
        atoms_coords = [a.xformCoord() for a in self.probes(ind)]
        try:
            angle = chimera.angle(*atoms_coords)
        except TypeError:  # four atoms, means dihedral
            angle = chimera.dihedral(*atoms_coords)

        if self.threshold == 'planar':
            return abs(math.sin(math.radians(angle)))
        return abs(self.threshold - angle.real)
Ejemplo n.º 2
0
class NormalModes(GeneProvider):
    """
    NormalModes class

    Parameters
    ----------
    method : str
        Either: 
        - prody : calculate normal modes using prody algorithms
        - gaussian : read normal modes from a gaussian output file
    target : str
        Name of the Gene containing the actual molecule
    modes : list, optional, default=range(12)
        Modes to be used to move the molecule
    group_by : str or callable, optional, default=None
        group_by_*: algorithm name or callable
        coarseGrain(prm) which makes ``mol.select().setBetas(i)``,
        where ``i`` is the index Coarse Grain group,
        and ``prm`` is ``prody.AtomGroup``
    group_lambda : int, optional
        Either: number of residues per group (default=7), or
        total mass per group (default=100)
    path : str
        Gaussian or prody modes output path. Required if ``method`` is
        ``gaussian``.
    write_modes: bool, optional
        write a ``molecule_modes.nmd`` file with the ProDy modes
    n_samples : int, optional, default=10000
        number of conformations to generate
    rmsd : float, optional, default=1.0
        average RMSD that the conformations will have with respect 
        to the initial conformation

    Attributes
    ----------
    allele : slice of prody.ensemble
        Randomly picked coordinates from NORMAL_MODE_SAMPLES
    NORMAL_MODES : prody.modes
        normal modes calculated for the molecule or readed
        from the gaussian frequencies output file stored
        in a prody modes class (ANM or RTB)
    NORMAL_MODE_SAMPLES : prody.ensemble
        configurations applying modes to molecule
    _original_coords : numpy.array
        Parent coordinates
    _chimera2prody : dict
        _chimera2prody[chimera_index] = prody_index
    """

    _validate = {
        parse.Required('method'): parse.In(['prody', 'gaussian']),
        'path': parse.RelPathToInputFile(),
        'write_modes': parse.Boolean,
        parse.Required('target'): parse.Molecule_name,
        'group_by': parse.In(['residues', 'mass', 'calpha', '']),
        'group_lambda': parse.All(parse.Coerce(int), parse.Range(min=1)),
        'modes': [parse.All(parse.Coerce(int), parse.Range(min=0))],
        'n_samples': parse.All(parse.Coerce(int), parse.Range(min=1)),
        'rmsd': parse.All(parse.Coerce(float), parse.Range(min=0))
    }

    def __init__(self,
                 method='prody',
                 target=None,
                 modes=None,
                 n_samples=10000,
                 rmsd=1.0,
                 group_by=None,
                 group_lambda=None,
                 path=None,
                 write_modes=False,
                 **kwargs):
        # Fire up!
        GeneProvider.__init__(self, **kwargs)
        self.method = method
        self.target = target
        self.modes = modes if modes is not None else range(12)
        self.max_modes = max(self.modes) + 1
        self.n_samples = n_samples
        self.rmsd = rmsd
        self.group_by = None
        self.group_by_options = None
        self.path = None
        self.write_modes = write_modes
        if method == 'prody':
            if path is None:
                self.normal_modes_function = self.calculate_prody_normal_modes
                self.group_by = group_by
                self.group_by_options = {} if group_lambda is None else {
                    'n': group_lambda
                }
            else:
                self.path = path
                self.normal_modes_function = self.read_prody_normal_modes
        else:  # gaussian
            self.normal_modes_function = self.read_gaussian_normal_modes
            if path is None:
                raise ValueError('Path is required if method == gaussian')
            self.path = path

        if self.name not in self._cache:
            self._cache[self.name] = LRU(300)

    def __ready__(self):
        """
        Second stage of initialization

        It saves the parent coordinates, calculates the normal modes and initializes the allele
        """
        cached = self._CACHE.get('normal_modes')
        if not cached:
            normal_modes, normal_modes_samples, chimera2prody, prody_molecule = self.normal_modes_function(
            )
            self._CACHE['normal_modes'] = normal_modes
            self._CACHE['normal_modes_samples'] = normal_modes_samples
            self._CACHE['chimera2prody'] = chimera2prody
            self._CACHE['original_coords'] = chimeracoords2numpy(self.molecule)
            if self.write_modes:
                title = os.path.join(self.parent.cfg.output.path,
                                     '{}_modes.nmd'.format(self.molecule.name))
                prody.writeNMD(title, normal_modes, prody_molecule)
        self.allele = random.choice(self.NORMAL_MODES_SAMPLES)

    def express(self):
        """
        Apply new coords as provided by current normal mode
        """
        c2p = self._chimera2prody
        for atom in self.molecule.atoms:
            index = c2p[atom.serialNumber]
            new_coords = self.allele[index]
            atom.setCoord(chimera.Point(*new_coords))

    def unexpress(self):
        """
        Undo coordinates change
        """
        for i, atom in enumerate(self.molecule.atoms):
            atom.setCoord(chimera.Point(*self._original_coords[i]))

    def mate(self, mate):
        """
        .. todo::
        
            Combine coords between two samples in NORMAL_MODES_SAMPLES?
            Or two samples between diferent NORMAL_MODES_SAMPLES?
            Or combine samples between two NORMAL_MODES_SAMPLES?

            For now : pass
        """
        pass

    def mutate(self, indpb):
        """
        (mutate to/get) another SAMPLE with probability = indpb
        """
        if random.random() < self.indpb:
            return random.choice(self.NORMAL_MODES_SAMPLES)

    #####
    @property
    def molecule(self):
        return self.parent.genes[self.target].compound.mol

    @property
    def _CACHE(self):
        return self._cache[self.name]

    @property
    def NORMAL_MODES(self):
        return self._CACHE.get('normal_modes')

    @property
    def NORMAL_MODES_SAMPLES(self):
        return self._CACHE.get('normal_modes_samples')

    @property
    def _chimera2prody(self):
        return self._CACHE.get('chimera2prody')

    @property
    def _original_coords(self):
        return self._CACHE.get('original_coords')

    def calculate_prody_normal_modes(self):
        """
        calculate normal modes, creates a diccionary between chimera and prody indices
        and calculate n_confs number of configurations using this modes
        """
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = prody_modes(prody_molecule, self.max_modes,
                            GROUPERS[self.group_by], **self.group_by_options)
        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule

    def read_prody_normal_modes(self):
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = prody.parseNMD(self.path)[0]
        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule

    def read_gaussian_normal_modes(self):
        """
        read normal modes, creates a diccionary between chimera and prody indices
        and calculate n_confs number of configurations using this modes
        """
        prody_molecule, chimera2prody = convert_chimera_molecule_to_prody(
            self.molecule)
        modes = gaussian_modes(self.path)

        samples = prody.sampleModes(modes=modes[self.modes],
                                    atoms=prody_molecule,
                                    n_confs=self.n_samples,
                                    rmsd=self.rmsd)
        samples.addCoordset(prody_molecule)
        samples_coords = [sample.getCoords() for sample in samples]
        return modes, samples_coords, chimera2prody, prody_molecule
Ejemplo n.º 3
0
class Contacts(ObjectiveProvider):
    """
    Contacts class
    Parameters
    ----------
    probes : str
        Name of molecule gene that is object of contacts analysis
    radius : float
        Maximum distance from any point of probes that is searched
        for possible interactions
    which : {'hydrophobic', 'clashes'}
        Type of interactions to measure
    clash_threshold : float, optional
        Maximum overlap of van-der-Waals spheres that is considered as
        a contact (attractive). If the overlap is greater, it's 
        considered a clash (repulsive)
    hydrophobic_threshold : float, optional
        Maximum overlap for hydrophobic patches.
    hydrophobic_elements : list of str, optional, defaults to [C, S]
        Which elements are allowed to interact in hydrophobic patches
    cutoff : float, optional
        If the overlap volume is greater than this, a penalty is applied. 
        Useful to filter bad solutions.
    bond_separation : int, optional
        Ignore clashes or contacts between atoms within n bonds.
    only_internal : bool, optional
    	If set to True, take into account only intramolecular 
	interactions, defaults to False
    Returns
    -------
    float
        Lennard-Jones-like energy when `which`=`hydrophobic`,
        and volumetric overlap of VdW spheres in A³ if `which`=`clashes`.
    """
    _validate = {
        parse.Required('probes'): [parse.Molecule_name],
        'radius': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'which': parse.In(['hydrophobic', 'clashes']),
        'clash_threshold': parse.Coerce(float),
        'hydrophobic_threshold': parse.Coerce(float),
        'cutoff': parse.Coerce(float),
        'hydrophobic_elements': [basestring],
        'bond_separation': parse.All(parse.Coerce(int), parse.Range(min=2)),
        'same_residue': parse.Coerce(bool),
        'only_internal': parse.Coerce(bool)
    }

    def __init__(self,
                 probes=None,
                 radius=5.0,
                 which='hydrophobic',
                 clash_threshold=0.6,
                 hydrophobic_threshold=-0.4,
                 cutoff=0.0,
                 hydrophobic_elements=('C', 'S'),
                 bond_separation=4,
                 same_residue=True,
                 only_internal=False,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.which = which
        self.radius = radius
        self.clash_threshold = clash_threshold
        self.hydrophobic_threshold = hydrophobic_threshold
        self.cutoff = cutoff
        self.hydrophobic_elements = set(hydrophobic_elements)
        self.bond_separation = bond_separation
        self.same_residue = same_residue
        self._probes = probes
        self.only_internal = only_internal
        if which == 'hydrophobic':
            self.evaluate = self.evaluate_hydrophobic
            self.threshold = hydrophobic_threshold
        else:
            self.evaluate = self.evaluate_clashes
            self.threshold = clash_threshold

    def molecules(self, ind):
        return [m.compound.mol for m in ind._molecules.values()]

    def probes(self, ind):
        return [ind.find_molecule(p).compound.mol for p in self._probes]

    def evaluate_clashes(self, ind):
        positive, negative = self.find_interactions(ind)
        clashscore = sum(
            abs(vol_overlap) for (a1, a2, overlap, vol_overlap) in negative)
        if self.cutoff and clashscore > self.cutoff:
            clashscore = -1000 * self.weight
        return clashscore

    def evaluate_hydrophobic(self, ind):
        positive, negative = self.find_interactions(ind)
        return sum(lj_energy for (a1, a2, overlap, lj_energy) in positive)

    def find_interactions(self, ind):
        atoms = self._surrounding_atoms(ind)
        options = dict(test=atoms,
                       intraRes=self.same_residue,
                       interSubmodel=True,
                       clashThreshold=self.threshold,
                       assumedMaxVdw=2.1,
                       hbondAllowance=0.2,
                       bondSeparation=self.bond_separation)
        clashes = DetectClash.detectClash(atoms, **options)
        return self._analyze_interactions(clashes)

    def _analyze_interactions(self, clashes):
        """
        Interpret contacts provided by DetectClash.
        Parameters
        ----------
        clashes : dict of dict
            Output of DetectClash. It's a dict of atoms, whose values are dicts.
            These subdictionaries contain all the contacting atoms as keys, and
            the respective overlaping length as values.
        Returns
        -------
        positive : list of list
            Each sublist depict an interaction, with four items: the two involved
            atoms, their distance, and their Lennard-Jones score.
        negative : list of list
            Each sublist depict an interaction, with four items: the two involved
            atoms, their distance, and their volumetric overlap.
        .. note ::
            First, collect atoms that can be involved in hydrophobic interactions.
            Namely, C and S.
            Then, iterate the contacting atoms, getting the distances. For each
            interaction, analyze the distance and, based on the threshold, determine
            if it's attractive or repulsive.
            Attractive interactions are weighted with a Lennard-Jones like function
            (``_lennard_jones``), while repulsive attractions are measured with
            the volumetric overlap of the involved atoms' Van der Waals spheres.
        """
        positive, negative = [], []
        for a1, clash in clashes.items():
            for a2, overlap in clash.items():
                # overlap < clash threshold : can be a hydrophobic interaction
                if overlap <= self.clash_threshold:
                    if (a1.element.name in self.hydrophobic_elements
                            and a2.element.name in self.hydrophobic_elements):
                        lj_energy = self._lennard_jones(a1, a2, overlap)
                        positive.append([a1, a2, overlap, lj_energy])
                # overlap > clash threshold : clash!
                else:
                    volumetric_overlap = self._vdw_vol_overlap(a1, a2, overlap)
                    negative.append([a1, a2, overlap, volumetric_overlap])
        return positive, negative

    def _surrounding_atoms(self, ind):
        """
        Get atoms in the search zone, based on the molecule, (possible) rotamer
        genes and the radius
        """
        self.zone.clear()
        #Add all atoms of probes molecules
        self.zone.add([a for m in self.probes(ind) for a in m.atoms])

        if not self.only_internal:
            #Add beta carbons of rotamers to find clashes/contacts in its surroundings
            rotamer_genes = [
                name for name, g in ind.genes.items()
                if g.__class__.__name__ == 'Rotamers'
            ]
            beta_carbons = []
            for n in rotamer_genes:
                for ((molname, pos), residue) in ind.genes[n].residues.items():
                    beta_carbons.extend(
                        [a for a in residue.atoms if a.name == 'CB'])
            self.zone.add(beta_carbons)

            #Surrounding zone from probes+rotamers atoms
            self.zone.merge(
                chimera.selection.REPLACE,
                chimera.specifier.zone(self.zone, 'atom', None, self.radius,
                                       self.molecules(ind)))
        return self.zone.atoms()

    @staticmethod
    def _lennard_jones(a1, a2, overlap=None):
        """
        VERY rough approximation of a Lennard-Jones score (12-6).
        Parameters
        ----------
        a1, a2 : chimera.Atom
        overlap : float
            Overlapping radii of involved atoms, as provided
            by DetectClash.
        Notes
        -----
        The usual implementation of a LJ potential is:
            LJ = 4*epsilon*(0.25*((r0/r)**12) - 0.5*((r0/r)**6))
        Two approximations are done:
            - The atoms involves are considered equal, hence the
              distance at which the energy is minimum (r0) is just
              the sum of their radii.
            - Epsilon is always 1.  
        """
        r0 = a1.radius + a2.radius
        if overlap is None:
            distance = a1.xformCoord().distance(a2.xformCoord())
        else:
            distance = r0 - overlap
        x = (r0 / distance)**6
        return (x * x - 2 * x)

    @staticmethod
    def _vdw_vol_overlap(a1, a2, overlap=None):
        """
        Volumetric overlap of Van der Waals spheres of atoms.
        Parameters
        ----------
        a1, a2 : chimera.Atom
        overlap : float
            Overlapping sphere segment of involved atoms
        .. note ::
            Adapted from Eran Eyal, Comput Chem 25: 712-724, 2004
        """
        PI = 3.14159265359
        if overlap is None:
            d = a1.xformCoord().distance(a2.xformCoord())
        else:
            d = a1.radius + a2.radius - overlap
        if d == 0:
            return 1000
        h_a, h_b = 0, 0
        if d < (a1.radius + a2.radius):
            h_a = (a2.radius**2 - (d - a1.radius)**2) / (2 * d)
            h_b = (a1.radius**2 - (d - a2.radius)**2) / (2 * d)

        return (PI / 3) * ((h_a**2) * (3 * a1.radius - h_a) + (h_b**2) *
                           (3 * a2.radius - h_b))
Ejemplo n.º 4
0
class Solvation(ObjectiveProvider):
    """
    Solvation class

    Parameters
    ----------
    targets : [str]
        Names of the molecule genes being analyzed
    threshold : float, optional, default=0
        Optimize the difference to this value
    radius : float, optional, default=5.0
        Max distance to search for neighbor atoms from targets.
    method : str, optional, default=area
        Which method should be used. Both methods compute the surface
        of the solvated molecule. `area` returns the surface area of such
        surface, while `volume` returns the volume occuppied by the model.

    Returns
    -------
    float
        Surface area of solvated shell, in A² (if method=area), or volume
        of solvated shell, in A³ (if method=volume).
    """

    _validate = {
        parse.Required('targets'): [parse.Molecule_name],
        'threshold': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'radius': parse.All(parse.Coerce(float), parse.Range(min=0)),
        'method': parse.In(['volume', 'area'])
    }

    def __init__(self,
                 targets=None,
                 threshold=0.0,
                 radius=5.0,
                 method='area',
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self._targets = targets
        self.threshold = threshold
        self.radius = radius
        self.method = method
        if method == 'area':
            self.evaluate = self.evaluate_area
        else:
            self.evaluate = self.evaluate_volume

    def targets(self, ind):
        return [
            ind.find_molecule(target).compound.mol for target in self._targets
        ]

    def molecules(self, ind):
        return tuple(m.compound.mol for m in ind._molecules.values())

    def surface(self, ind):
        atoms = self.zone_atoms(self.targets(ind), self.molecules(ind))
        return grid_sas_surface(atoms)

    def evaluate_area(self, ind):
        return abs(surface_area(*self.surface(ind)) - self.threshold)

    def evaluate_volume(self, ind):
        return abs(enclosed_volume(*self.surface(ind))[0] - self.threshold)

    def zone_atoms(self, probes, molecules):
        self.zone.clear()
        self.zone.add([a for probe in probes for a in probe.atoms])
        if self.radius:
            self.zone.merge(
                chimera.selection.REPLACE,
                chimera.specifier.zone(self.zone, 'atom', None, self.radius,
                                       molecules))
        return self.zone.atoms()
Ejemplo n.º 5
0
class Energy(ObjectiveProvider):
    """
    Calculate the energy of a system

    Parameters
    ----------
    targets : list of str, default=None
        If set, which molecules should be evaluated. Else, all will be evaluated.
    forcefields : list of str, default=('amber99sbildn.xml',)
        Which forcefields to use
    auto_parametrize: list of str, default=None
        List of Molecule instances GAUDI should try to auto parametrize with antechamber.
    parameters : list of 2-item list of str
        List of (gaff.mol2, .frcmod) files to use as parametrization source.
    platform : str
        Which platform to use for calculations. Choose between CPU, CUDA, OpenCL.

    Returns
    -------
    float
        The estimated potential energy, in kJ/mol

    """

    _validate = {
        'targets': [parse.Molecule_name],
        'forcefields': [
            parse.Any(parse.ExpandUserPathExists,
                      parse.In(_openmm_builtin_forcefields))
        ],
        'auto_parametrize': [parse.Molecule_name],
        'parameters':
        [parse.All([parse.ExpandUserPathExists], parse.Length(min=2, max=2))],
        'platform':
        parse.In(['CUDA', 'OpenCL', 'CPU'])
    }

    def __init__(self,
                 targets=None,
                 forcefields=('amber99sbildn.xml', ),
                 auto_parametrize=None,
                 parameters=None,
                 platform=None,
                 *args,
                 **kwargs):
        if kwargs.get('precision', 6) < 6:
            kwargs['precision'] = 6
        ObjectiveProvider.__init__(self, **kwargs)
        self.auto_parametrize = auto_parametrize
        self._targets = targets
        self._parameters = parameters
        self.platform = platform
        self.topology = None
        self._simulation = None

        additional_ffxml = []
        if parameters:
            additional_ffxml.append(create_ffxml_file(*zip(*parameters)))
        if auto_parametrize:
            filenames = [
                g.path for m in auto_parametrize
                for g in self.environment.cfg.genes if g.name == m
            ]
            additional_ffxml.append(self._gaff2xml(*filenames))

        self._forcefields = tuple(forcefields) + tuple(additional_ffxml)
        self.forcefield = openmm_app.ForceField(*self._forcefields)

    def evaluate(self, individual):
        """
        Calculates the energy of current individual

        Notes
        -----
        For static calculations, where molecules are essentially always the same,
        but with different coordinates, we only need to generate topologies once.
        However, for dynamic jobs, with potentially different molecules involved
        each time, we cannot guarantee having the same topology. As a result,
        we generate it again for each evaluation.
        """
        molecules = self.molecules(individual)
        coordinates = self.chimera_molecule_to_openmm_positions(*molecules)

        # Build topology if it's first time or a dynamic job
        if self.topology is None or not self._gaudi_is_static(individual):
            self.topology = self.chimera_molecule_to_openmm_topology(
                *molecules)
            self._simulation = None  # This forces a Simulation rebuild

        return self.calculate_energy(coordinates)

    def molecules(self, individual):
        if self._targets is None:
            return [m.compound.mol for m in individual._molecules.values()]
        else:
            return [
                individual.find_molecule(t).compound.mol for t in self._targets
            ]

    @property
    def simulation(self):
        """
        Build a new OpenMM simulation if not yet defined and return it

        Notes
        -----
        self.topology must be defined previously!
        Use self.chimera_molecule_to_openmm_topology to set it.

        """
        if self._simulation is None:
            system = self.forcefield.createSystem(
                self.topology,
                nonbondedMethod=openmm_app.CutoffNonPeriodic,
                nonbondedCutoff=1.0 * unit.nanometers,
                rigidWater=True,
                constraints=None)
            integrator = openmm.VerletIntegrator(0.001)
            if self.platform is not None:
                platform = openmm.Platform.getPlatformByName(self.platform),
            else:
                platform = ()
            self._simulation = openmm_app.Simulation(self.topology, system,
                                                     integrator, *platform)
        return self._simulation

    def calculate_energy(self, coordinates):
        """
        Set up an OpenMM simulation with default parameters
        and return the potential energy of the initial state

        Parameters
        ----------
        coordinates : simtk.unit.Quantity
            Positions of the atoms in the system

        Returns
        -------
        potential_energy : float
            Potential energy of the system, in kJ/mol
        """
        self.simulation.context.setPositions(coordinates)
        # Retrieve initial energy
        state = self.simulation.context.getState(getEnergy=True)
        return state.getPotentialEnergy()._value

    @staticmethod
    def chimera_molecule_to_openmm_topology(*molecules):
        """
        Convert a Chimera Molecule object to OpenMM structure,
        providing topology and coordinates.

        Parameters
        ----------
        molecule : chimera.Molecule

        Returns
        -------
        topology : simtk.openmm.app.topology.Topology
        coordinates : simtk.unit.Quantity

        """
        # Create topology

        atoms, residues, chains = {}, {}, {}
        topology = openmm_app.Topology()
        for i, mol in enumerate(molecules):
            for a in mol.atoms:
                chain_id = (i, a.residue.id.chainId)
                try:
                    chain = chains[chain_id]
                except KeyError:
                    chain = chains[chain_id] = topology.addChain()

                r = a.residue
                try:
                    residue = residues[r]
                except KeyError:
                    residue = residues[r] = topology.addResidue(r.type, chain)
                name = a.name
                element = openmm_app.Element.getByAtomicNumber(
                    a.element.number)
                serial = a.serialNumber
                atoms[a] = topology.addAtom(name, element, residue, serial)

            for b in mol.bonds:
                topology.addBond(atoms[b.atoms[0]], atoms[b.atoms[1]])

        return topology

    @staticmethod
    def chimera_molecule_to_openmm_positions(*molecules):
        # Get positions
        positions = [
            atom_positions(m.atoms, m.openState.xform) for m in molecules
        ]
        all_positions = numpy.concatenate(positions)
        return unit.Quantity(all_positions, unit=unit.angstrom)

    @staticmethod
    def _gaff2xml(*filenames, **kwargs):
        """
        Use OpenMolTools wrapper to run antechamber programatically
        and auto parametrize requested molecules.

        Parameters
        ----------
        filenames: list of str
            List of the filenames of the molecules to parametrize

        Returns
        -------
        ffxmls : StringIO
            Compiled ffxml file produced by antechamber and openmoltools converter
        """
        frcmods, gaffmol2s = [], []
        for filename in filenames:
            name = '.'.join(filename.split('.')[:-1])
            gaffmol2, frcmod = run_antechamber(name, filename, **kwargs)
            frcmods.append(frcmod)
            gaffmol2s.append(gaffmol2)
        return create_ffxml_file(gaffmol2s, frcmods)

    def _gaudi_is_static(self, individual):
        """
        Check if this essay is performing topology changes.

        Genes that can change topologies:
            - gaudi.genes.rotamers with mutations ON
            - gaudi.genes.molecule with block building enabled

        Parameters
        ----------
        individual : gaudi.base.Individual
            The individual to be analyzed for dynamic behaviour

        Returns
        -------
        bool
        """
        for gene in individual.genes.values():
            if gene.__class__.__name__ == 'Mutamers':
                if gene.mutations:
                    return False
            if gene.__class__.__name__ == 'Molecule':
                if len(gene.catalog) > 1:
                    return False

        return True
Ejemplo n.º 6
0
class Gold(ObjectiveProvider):
    """
    Gold class

    Parameters
    ----------
    protein : str
        The name of molecule acting as protein
    ligand : str
        The name of molecule acting as ligand
    scoring : str, optional, defaults to chemscore
        Fitness function to use. Choose between chemscore, chemplp,
        goldscore and asp.
    score_component : str, optional, defaults to 'Score'
        Scoring fields to parse out of the rescore.log file, such as
        Score, DG, S(metal), etc.
    radius : float, optional, defaults to 10.0
        Radius (in A) of binding site sphere, the origin of which is
        automatically centered at the ligand's center of mass.

    Returns
    -------
    float
        Interaction energy as reported by GOLD's chosen scoring function
    """
    _validate = {
        parse.Required('protein'): parse.Molecule_name,
        parse.Required('ligand'): parse.Molecule_name,
        'scoring': parse.In(['chemscore', 'chemplp', 'goldscore', 'asp']),
        'radius': parse.Coerce(float),
        'score_component': str,
    }

    def __init__(self,
                 protein='Protein',
                 ligand='Ligand',
                 scoring='chemscore',
                 score_component='Score',
                 radius=10,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.protein_names = [protein]
        self.ligand_names = [ligand]
        self.scoring = scoring
        self.score_component = score_component
        self.radius = radius
        self.executable = find_executable('gold_auto')
        if self.executable is None:
            sys.exit(
                'GOLD could not be found in $PATH. Is it (correctly) installed?'
            )
        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling LigScore binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        proteins = list(self.get_molecule_by_name(ind, *self.protein_names))
        ligands = list(self.get_molecule_by_name(ind, *self.ligand_names))

        protein_path = self.prepare_proteins(proteins)
        ligand_path = self.prepare_ligands(ligands)
        origin = self.origin(ligands[0])
        command = self.prepare_command(protein_path, ligand_path, origin)

        try:
            os.chdir(self.tmpdir)
            p = subprocess.call(command)
            return self.parse_output('rescore.log')
        except (subprocess.CalledProcessError, IOError):
            logger.warning("Could not run GOLD with command %s", command)
            return -100000 * self.weight
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_proteins(self, proteins):
        proteinpath = '{}_proteins.pdb'.format(self.tmpfile)
        last_protein = proteins.pop()
        last_protein.write(absolute=proteinpath,
                           combined_with=proteins,
                           filetype='pdb')
        self._paths['proteins'] = proteinpath
        return proteinpath

    def prepare_ligands(self, ligands):
        ligandpath = '{}_ligands.mol2'.format(self.tmpfile)
        ligand_mols = [lig.compound.mol for lig in ligands]

        writeMol2(ligand_mols,
                  ligandpath,
                  temporary=True,
                  multimodelHandling='combined')
        self._paths['ligands'] = ligandpath
        return ligandpath

    def origin(self, molecule):
        molecule = molecule.compound.mol
        coordinates = atom_positions(molecule.atoms, molecule.openState.xform)
        masses = np.fromiter((a.element.mass for a in molecule.atoms),
                             dtype='float32',
                             count=molecule.numAtoms)
        return np.average(coordinates, axis=0, weights=masses)

    def prepare_command(self, protein_path, ligand_path, origin):
        replaces = dict(PROTEIN=protein_path,
                        LIGAND=ligand_path,
                        ORIGIN='{} {} {}'.format(*origin),
                        SCORING=self.scoring,
                        RADIUS=self.radius)
        inputfile = _TEMPLATE.safe_substitute(replaces)
        inputfilepath = self.tmpfile + '.conf'
        with open(self.tmpfile + '.conf', 'w') as f:
            f.write(inputfile)
        self._paths['conf'] = inputfilepath
        return [self.executable, inputfilepath]

    def parse_output(self, filename):
        """ Get last word of first line (and unique) and parse it into float """
        fitness = 4
        with open(filename) as f:
            for line in f:
                if not line.strip():
                    continue
                fields = line.split()
                if fields[0] == 'Status':
                    fitness = fields.index(self.score_component)
                elif fields[0] == 'Ok':
                    return float(fields[fitness])

    def clean(self):
        for p in self._paths.values():
            os.remove(p)
        self._paths.clear()
Ejemplo n.º 7
0
class Distance(ObjectiveProvider):

    """
    Distance class

    Parameters
    ----------
    threshold : float
        Optimum distance to meet
    tolerance : float
        Maximum deviation from threshold that is not penalized
    target : str
        The atom to measure the distance to, expressed as
        <molecule name>/<atom serial>
    probes : list of str
        The atoms whose distance to `target` is being measured,
        expressed as <molecule name>/<atom serial>. If more than one
        is provided, the average of all of them is returned
    center_of_mass : bool

    Returns
    -------
    float
        (Mean of) absolute deviation from threshold distance, in A.
    """
    _validate = {
        parse.Required('probes'): parse.AssertList(parse.Named_spec("molecule", "atom")),
        parse.Required('target'): parse.Any(parse.Named_spec("molecule", "atom"),
                                            parse.Coordinates),
        parse.Required('threshold'): parse.Any(parse.Coerce(float), parse.In(['covalent'])),
        'tolerance': parse.Coerce(float),
        'center_of_mass': parse.Coerce(float)
    }

    def __init__(self, threshold=None, tolerance=None, target=None, probes=None,
                 center_of_mass=False, *args, **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.threshold = threshold
        self.tolerance = tolerance
        self.center_of_mass = center_of_mass
        self._probes = probes
        self._target = target
        if self.center_of_mass:
            self.evaluate = self.evaluate_center_of_mass
        else:
            self.evaluate = self.evaluate_distances

    def atoms(self, ind, *targets):
        for target in targets:
            mol, serial = target
            for atom in ind.find_molecule(mol).find_atoms(serial):
                yield atom

    def evaluate_distances(self, ind):
        """
        Measure the distance
        """
        distances = []
        if isinstance(self._target[0], basestring): # AtomSpec like 'Molecule/1'
            target = ind.find_molecule(self._target.molecule
                     ).find_atom(self._target.atom).xformCoord()
        else:  # coordinates
            target = chimera.Point(*self._target)
        for a in self.atoms(ind, *self._probes):
            d = self._distance(a, target)
            if self.threshold == 'covalent':
                threshold = chimera.Element.bondLength(a.element, target.element)
            else:
                threshold = self.threshold
            d = d - threshold
            if self.tolerance is not None and d < self.tolerance:
                distances.append(-1000 * self.weight)
            else:
                distances.append(d)

        return numpy.mean(numpy.absolute(distances))

    def evaluate_center_of_mass(self, ind):
        target = ind.find_molecule(self._target.molecule).find_atom(self._target.atom)
        probes = list(self.atoms(ind, *self._probes))
        center_of_mass = self._center(*probes)

        return target.xformCoord().distance(chimera.Point(*center_of_mass))

    @staticmethod
    def _distance(atom, target):
        return atom.xformCoord().distance(target)

    @staticmethod
    def _center(*atoms):
        coords, masses = [], []
        for a in atoms:
            coords.append(a.xformCoord())
            masses.append(a.element.mass)

        return numpy.average(coords, axis=0, weights=masses)
Ejemplo n.º 8
0
class LigScore(ObjectiveProvider):
    """
    LigScore class

    Parameters
    ----------
    proteins : list of str
        The name of molecules that are acting as proteins
    ligands : list of str
        The name of molecules that are acting as ligands
    binary : str, optional
        Path to ligand_score executable
    library : str, optional
        Path to LigScore lib file

    Returns
    -------
    float
        Interaction energy as reported by IMP's ligand_score.
    """
    _validate = {
        parse.Required('proteins'): [parse.Molecule_name],
        parse.Required('ligands'): [parse.Molecule_name],
        'method': parse.In(['rank', 'pose']),
        'binary': parse.ExpandUserPathExists,
        'library': parse.ExpandUserPathExists,
    }

    def __init__(self,
                 proteins=('Protein', ),
                 ligands=('Ligand', ),
                 method='pose',
                 binary=None,
                 library=None,
                 *args,
                 **kwargs):
        ObjectiveProvider.__init__(self, **kwargs)
        self.protein_names = proteins
        self.ligand_names = ligands
        self.binary = find_executable(
            'ligand_score') if binary is None else binary
        self.library = library
        self.method = method

        self._oldworkingdir = os.getcwd()
        self._paths = {}
        if os.name == 'posix' and os.path.exists('/dev/shm'):
            self.tmpdir = '/dev/shm'
        else:
            self.tmpdir = default_tempdir()

    def get_molecule_by_name(self, ind, *names):
        """
        Get a molecule gene instance of individual by its name
        """
        for name in names:
            yield ind.find_molecule(name)

    def evaluate(self, ind):
        """
        Run a subprocess calling LigScore binary with provided options,
        and parse the results. Clean tmp files at exit.
        """
        self.tmpfile = os.path.join(self.tmpdir, next(tempnames()))
        proteins = list(self.get_molecule_by_name(ind, *self.protein_names))
        ligands = list(self.get_molecule_by_name(ind, *self.ligand_names))

        protein_path = self.prepare_proteins(proteins)
        ligand_path = self.prepare_ligands(ligands)
        command = self.prepare_command(protein_path, ligand_path)

        try:
            os.chdir(self.tmpdir)
            stream = subprocess.check_output(command, universal_newlines=True)
        except subprocess.CalledProcessError:
            logger.warning("Could not run LigScore with command %s", command)
            return -100000 * self.weight
        else:
            return self.parse_output(stream)
        finally:
            self.clean()
            os.chdir(self._oldworkingdir)

    def prepare_proteins(self, proteins):
        proteinpath = '{}_proteins.pdb'.format(self.tmpfile)
        last_protein = proteins.pop()
        last_protein.write(absolute=proteinpath,
                           combined_with=proteins,
                           filetype='pdb')
        self._paths['proteins'] = proteinpath
        return proteinpath

    def prepare_ligands(self, ligands):
        ligandpath = '{}_ligands.mol2'.format(self.tmpfile)
        ligand_mols = [lig.compound.mol for lig in ligands]

        writeMol2(ligand_mols,
                  ligandpath,
                  temporary=True,
                  multimodelHandling='combined')
        self._paths['ligands'] = ligandpath
        return ligandpath

    def prepare_command(self, protein_path, ligand_path):
        cmd = [self.binary, '--' + self.method, ligand_path, protein_path]
        if self.library:
            cmd.append(self.library)
        return map(str, cmd)

    def parse_output(self, stream):
        """ Get last word of first line (and unique) and parse it into float """
        return float(stream.splitlines()[0].split()[-1])

    def clean(self):
        for p in self._paths.values():
            os.remove(p)
        self._paths.clear()