Example #1
0
def generate_fragments(inputf, output_dir, pdf=False, combinatorial=True, MAX_ROTORS=2, strict_stereo=True, remove_map=True):
    """
    This function generates fragment SMILES files sorted by rotatable bonds from an input molecule file.
    The output .smi files are written out to `output_dir` and named `nrotor_n.smi` where n corresponds to the number
    of rotatable bonds for all fragments in the file.
    Parameters
    ----------
    inputf: str
        absolute path to input molecule file
    output_dir: str
        absolute path to output directory
    pdf: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial

    """
    ifs = oechem.oemolistream()
    smiles_unique = set()

    mol = oechem.OEMol()
    if ifs.open(inputf):
        while oechem.OEReadMolecule(ifs, mol):
            openeye.normalize_molecule(mol)
            logger().info('fragmenting {}...'.format(mol.GetTitle()))
            if remove_map:
                # Remove tags from smiles. This is done to make it easier to find duplicate fragments
                for a in mol.GetAtoms():
                    a.SetMapIdx(0)
            frags = _generate_fragments(mol, strict_stereo=strict_stereo)
            if not frags:
                logger().warn('Skipping {}, SMILES: {}'.format(mol.GetTitle(), oechem.OECreateSmiString(mol)))
                continue
            charged = frags[0]
            frags = frags[-1]
            if combinatorial:
                smiles = smiles_with_combined(frags, charged, MAX_ROTORS=MAX_ROTORS)
            else:
                smiles = frag_to_smiles(frags, charged)

            smiles_unique.update(list(smiles.keys()))
            if pdf:
                oname = '{}.pdf'.format(mol.GetTitle())
                ToPdf(charged, oname, frags)
            del charged, frags

    # Generate oedatabase for all fragments
    split_fname = inputf.split('.')
    base = split_fname[-2].split('/')[-1]
    ofname = base + '_frags'
    utils.to_smi(list(smiles_unique), output_dir, ofname)
    ofname_ext = ofname + '.smi'
    oedb_name = os.path.join(output_dir, ofname_ext)
    utils.create_oedatabase_idxfile(oedb_name)
    _sort_by_rotbond(oedb_name, outdir=output_dir)
Example #2
0
def _generate_fragments(mol, strict_stereo=True):
    """
    This function generates fragments from a molecule.

    Parameters
    ----------
    mol: OEMol

    Returns
    -------
    charged: charged OEMOl
    frags: dict of AtomBondSet mapped to rotatable bond index the fragment was built up from.
    """

    charged = openeye.get_charges(mol,
                                  keep_confs=1,
                                  strictStereo=strict_stereo)

    # Check if WBO were calculated
    bonds = [bond for bond in charged.GetBonds()]
    for bond in bonds[:1]:
        try:
            bond.GetData('WibergBondOrder')
        except ValueError:
            logger().warn(
                "WBO were not calculate. Cannot fragment molecule {}".format(
                    charged.GetTitle()))
            return False

    tagged_rings, tagged_fgroups = tag_molecule(charged)

    # Iterate over bonds
    frags = {}
    for bond in charged.GetBonds():
        if bond.IsRotor():
            atoms, bonds = _build_frag(bond=bond,
                                       mol=charged,
                                       tagged_fgroups=tagged_fgroups,
                                       tagged_rings=tagged_rings)
            atom_bond_set = _to_AtomBondSet(charged, atoms, bonds)
            frags[bond.GetIdx()] = atom_bond_set

    return charged, frags
Example #3
0
def to_oemol(filename, title=True, verbose=True):
    """Create OEMol from file. If more than one mol in file, return list of OEMols.

    Parameters
    ----------
    filename: str
        absolute path to
    title: str, title
        title for molecule. If None, IUPAC name will be given as title.

    Returns
    -------
    mollist: list
        list of OEMol for multiple molecules. OEMol if file only has one molecule.
    """

    if not os.path.exists(filename):
        raise Exception("File {} not found".format(filename))
    if verbose:
        logger().info("Loading molecules from {}".format(filename))

    ifs = oechem.oemolistream(filename)
    #moldb = oechem.OEMolDatabase(ifs)
    mollist = []

    molecule = oechem.OECreateOEGraphMol()
    while oechem.OEReadMolecule(ifs, molecule):
        molecule_copy = oechem.OEMol(molecule)
        if title:
            title = molecule_copy.GetTitle()
            if verbose:
                logger().infor("Reading molecule {}".format(title))

        mollist.append(normalize_molecule(molecule_copy, title))

    if len(mollist) <= 1:
        mollist = mollist[0]

    ifs.close()

    return mollist
Example #4
0
def to_oemol(filename, title=True, verbose=True):
    """Create OEMol from file. If more than one mol in file, return list of OEMols.

    Parameters
    ----------
    filename: str
        absolute path to
    title: str, title
        title for molecule. If None, IUPAC name will be given as title.

    Returns
    -------
    mollist: list
        list of OEMol for multiple molecules. OEMol if file only has one molecule.
    """

    if not os.path.exists(filename):
        raise Exception("File {} not found".format(filename))
    if verbose:
        logger().info("Loading molecules from {}".format(filename))

    ifs = oechem.oemolistream(filename)
    #moldb = oechem.OEMolDatabase(ifs)
    mollist = []

    molecule = oechem.OECreateOEGraphMol()
    while oechem.OEReadMolecule(ifs, molecule):
        molecule_copy = oechem.OEMol(molecule)
        if title:
            title = molecule_copy.GetTitle()
            if verbose:
                logger().infor("Reading molecule {}".format(title))

        mollist.append(normalize_molecule(molecule_copy, title))

    if len(mollist) <= 1:
        mollist = mollist[0]

    ifs.close()

    return mollist
Example #5
0
def _generate_fragments(mol, strict_stereo=True):
    """
    This function generates fragments from a molecule.

    Parameters
    ----------
    mol: OEMol

    Returns
    -------
    charged: charged OEMOl
    frags: dict of AtomBondSet mapped to rotatable bond index the fragment was built up from.
    """

    charged = openeye.get_charges(mol, keep_confs=1, strictStereo=strict_stereo)

    # Check if WBO were calculated
    bonds = [bond for bond in charged.GetBonds()]
    for bond in bonds[:1]:
        try:
            bond.GetData('WibergBondOrder')
        except ValueError:
            logger().warn("WBO were not calculate. Cannot fragment molecule {}".format(charged.GetTitle()))
            return False

    tagged_rings, tagged_fgroups = tag_molecule(charged)

    # Iterate over bonds
    frags = {}
    for bond in charged.GetBonds():
        if bond.IsRotor():
            atoms, bonds = _build_frag(bond=bond, mol=charged, tagged_fgroups=tagged_fgroups, tagged_rings=tagged_rings)
            atom_bond_set = _to_AtomBondSet(charged, atoms, bonds)
            frags[bond.GetIdx()] = atom_bond_set

    return charged, frags
Example #6
0
def get_atom_map(tagged_smiles, molecule=None):
    """
    Returns a dictionary that maps tag on SMILES to atom index in molecule.
    Parameters
    ----------
    tagged_smiles: str
        index-tagged explicit hydrogen SMILES string
    molecule: OEMol
        molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
        this molecule and it will be returned.

    Returns
    -------
    atom_map: dict
        a dictionary that maps tag to atom index {tag:idx}
    molecule: OEMol
        If a molecule was not provided, the generated molecule will be returned.
    """
    if molecule is None:
        molecule = openeye.smiles_to_oemol(tagged_smiles)

    ss = oechem.OESubSearch(tagged_smiles)
    oechem.OEPrepareSearch(molecule, ss)
    ss.SetMaxMatches(1)

    atom_map = {}
    t1 = time.time()
    matches = [m for m in ss.Match(molecule)]
    t2 = time.time()
    seconds = t2 - t1
    logger().info("Substructure search took {} seconds".format(seconds))
    if not matches:
        logger().info("MCSS failed for {}, smiles: {}".format(
            molecule.GetTitle(), tagged_smiles))
        return False
    for match in matches:
        for ma in match.GetAtoms():
            atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()

    # sanity check
    mol = oechem.OEGraphMol()
    oechem.OESubsetMol(mol, match, True)
    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
    if molecule is None:
        return molecule, atom_map

    return atom_map
Example #7
0
def get_atom_map(tagged_smiles, molecule=None):
    """
    Returns a dictionary that maps tag on SMILES to atom index in molecule.
    Parameters
    ----------
    tagged_smiles: str
        index-tagged explicit hydrogen SMILES string
    molecule: OEMol
        molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
        this molecule and it will be returned.

    Returns
    -------
    atom_map: dict
        a dictionary that maps tag to atom index {tag:idx}
    molecule: OEMol
        If a molecule was not provided, the generated molecule will be returned.
    """
    if molecule is None:
        molecule = openeye.smiles_to_oemol(tagged_smiles)

    ss = oechem.OESubSearch(tagged_smiles)
    oechem.OEPrepareSearch(molecule, ss)
    ss.SetMaxMatches(1)

    atom_map = {}
    t1 = time.time()
    matches = [m for m in ss.Match(molecule)]
    t2 = time.time()
    seconds = t2-t1
    logger().info("Substructure search took {} seconds".format(seconds))
    if not matches:
        logger().info("MCSS failed for {}, smiles: {}".format(molecule.GetTitle(), tagged_smiles))
        return False
    for match in matches:
        for ma in match.GetAtoms():
            atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()

    # sanity check
    mol = oechem.OEGraphMol()
    oechem.OESubsetMol(mol, match, True)
    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
    if molecule is None:
        return molecule, atom_map

    return atom_map
Example #8
0
    def __init__(self,
                 param,
                 frags,
                 stream=None,
                 param_to_opt=None,
                 rj=False,
                 init_random=True,
                 tau='mult'):
        """

        Parameters
        ----------
        param : Parmed CharmmParameterSet
        frags : list of torsionfit.QMDataBase
        stream : str
            Path to CHARMM stream file. Default None. If None, param_to_opt list must be given. When a stream file is
            specified, param_to_opt is generated if the penalty of the parameters are greater than a threshold.
        param_to_opt : list of tuples of torsions.
            Default None.
        rj : bool
            If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False
        init_random: bool
            Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set.
            Default True
        tau: string.
            options are 'mult' or 'single'. When 'mult', every element in K_m will have its own 'tau', when 'single',
            each K_m will have one tau.
            Default 'mult'

        Returns
        -------
        pymc model

        """

        if type(frags) != list:
            frags = [frags]

        self.pymc_parameters = dict()
        self.frags = frags
        self.rj = rj
        if param_to_opt:
            self.parameters_to_optimize = param_to_opt
        else:
            self.parameters_to_optimize = TorsionScan.to_optimize(
                param, stream)

        multiplicity_bitstrings = dict()

        # offset
        for frag in self.frags:
            name = '%s_offset' % frag.topology._residues[0]
            offset = pymc.Uniform(name, lower=-50, upper=50, value=0)
            self.pymc_parameters[name] = offset

        if tau == 'mult':
            value = np.log(np.ones(6) * 0.01)
        elif tau == 'single':
            value = np.log(0.01)
        else:
            raise Exception(
                "Only 'mult' and 'single' are allowed options for tau")

        for p in self.parameters_to_optimize:
            torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3]

            # lower and upper for this distribution are based on empirical data that below this amount the prior is too
            # biased and above the moves are usually rejected.
            self.pymc_parameters['log_sigma_k_{}'.format(
                torsion_name)] = pymc.Uniform(
                    'log_sigma_k_{}'.format(torsion_name),
                    lower=-4.6052,
                    upper=3.453,
                    value=value)
            self.pymc_parameters['sigma_k_{}'.format(
                torsion_name)] = pymc.Lambda(
                    'sigma_k_{}'.format(torsion_name),
                    lambda log_sigma_k=self.pymc_parameters[
                        'log_sigma_k_{}'.format(torsion_name)]: np.exp(
                            log_sigma_k))
            self.pymc_parameters['precision_k_{}'.format(
                torsion_name)] = pymc.Lambda(
                    'precision_k_{}'.format(torsion_name),
                    lambda log_sigma_k=self.pymc_parameters[
                        'log_sigma_k_{}'.format(torsion_name)]: np.exp(
                            -2 * log_sigma_k))

            self.pymc_parameters['{}_K'.format(torsion_name)] = pymc.Normal(
                '{}_K'.format(torsion_name),
                value=np.zeros(6),
                mu=0,
                tau=self.pymc_parameters['precision_k_{}'.format(
                    torsion_name)])

            if torsion_name not in multiplicity_bitstrings.keys():
                multiplicity_bitstrings[torsion_name] = 0

        if self.rj:
            for torsion_name in multiplicity_bitstrings.keys():
                name = torsion_name + '_multiplicity_bitstring'
                bitstring = pymc.DiscreteUniform(
                    name,
                    lower=0,
                    upper=63,
                    value=multiplicity_bitstrings[torsion_name])
                self.pymc_parameters[name] = bitstring

        if init_random:
            # randomize initial value
            for parameter in self.pymc_parameters:
                if type(
                        self.pymc_parameters[parameter]
                ) != pymc.CommonDeterministics.Lambda and parameter[:
                                                                    11] != 'log_sigma_k':
                    self.pymc_parameters[parameter].random()
                    logger().info('initial value for {} is {}'.format(
                        parameter, self.pymc_parameters[parameter].value))

        self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma',
                                                         lower=-10,
                                                         upper=3,
                                                         value=np.log(0.01))
        self.pymc_parameters['sigma'] = pymc.Lambda(
            'sigma',
            lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                log_sigma))
        self.pymc_parameters['precision'] = pymc.Lambda(
            'precision',
            lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                -2 * log_sigma))

        # Precalculate phis
        n = np.array([1., 2., 3., 4., 5., 6.])
        self.models = []
        for i in itertools.product((0, 1), repeat=6):
            self.models.append(i)

        inner_sum = []
        for i, frag in enumerate(frags):
            inner_sum.append(OrderedDict())
            for t in frag.phis:
                inner_sum[i][t] = (1 + np.cos(
                    frag.phis[t][:, np.newaxis] * n[:, np.newaxis])).sum(-1)
        self.inner_sum = inner_sum

        @pymc.deterministic
        def torsion_energy(pymc_parameters=self.pymc_parameters):
            mm = np.ndarray(0)

            for i, mol in enumerate(self.frags):
                Fourier_sum = np.zeros((mol.n_frames))
                for t in inner_sum[i]:
                    name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3]
                    if self.rj:
                        K = pymc_parameters['{}_K'.format(name)] * self.models[
                            pymc_parameters['{}_multiplicity_bitstring'.format(
                                name)]]
                    else:
                        K = pymc_parameters['{}_K'.format(name)]
                    Fourier_sum += (K * inner_sum[i][t]).sum(1)
                Fourier_sum_rel = Fourier_sum - min(Fourier_sum)
                Fourier_sum_rel += pymc_parameters['{}_offset'.format(
                    mol.topology._residues[0])]
                mm = np.append(mm, Fourier_sum)
            return mm

        size = sum([len(i.qm_energy) for i in self.frags])
        residual_energy = np.ndarray(0)
        for i in range(len(frags)):
            residual_energy = np.append(residual_energy, frags[i].delta_energy)

        self.pymc_parameters['torsion_energy'] = torsion_energy
        self.pymc_parameters['qm_fit'] = pymc.Normal(
            'qm_fit',
            mu=self.pymc_parameters['torsion_energy'],
            tau=self.pymc_parameters['precision'],
            size=size,
            observed=True,
            value=residual_energy)
Example #9
0
def update_param_from_sample(param_list, param, db=None, model=None, i=-1, rj=False, phase=False, n_5=True, continuous=False,
                             model_type='numpy'):
    """
    This function parameterizes sampled torsion with values of sample i in database or current value in pymc model.
    The modifications are in place.

    parameters:
    -----------
     param_list: list
      list of tuples of torsions being sampled [(A, B, C, D), (E, F, G, H)]
     param: parmed.charmm.parameterset
     db: sqlit_plus database or pymc sampler
        default is None
     model: pymc model
        default is None
     i: int, sample to use
        default is -1
     rj: flag if reversible jump is on.
         Default False
     phase: bool
        Flag if phases were sampled. Default is False
     n_5: bool
        Flag if multiplicity of 5 was sampled and also needs to be modified. Default is True.
    model: string
        which torsionfit model was used
    """
    logger().debug('updating parameters')
    if type(param_list) is not list:
        param_list = [param_list]
    for t in param_list:
        torsion_name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3]
        if rj:
            multiplicity_key = torsion_name + '_multiplicity_bitstring'
            if db is not None:
                multiplicity_bitstring = int(db.trace(multiplicity_key)[i])
            if model is not None:
                multiplicity_bitstring = model.pymc_parameters[multiplicity_key].value
        else:
            multiplicity_bitstring = 65
        reverse_t = tuple(reversed(t))
        for n in range(len(param.dihedral_types[t])):
            m = int(param.dihedral_types[t][n].per)
            logger().debug('Working on {}'.format(m))
            multiplicity_bitmask = 2 ** (m - 1)  # multiplicity bitmask
            if (multiplicity_bitstring & multiplicity_bitmask) or not rj:
                sample = None
                if m == 5 and not n_5:
                    continue
                if model_type == 'numpy':
                    k = torsion_name + '_K'
                elif model_type == 'openmm':
                    k = torsion_name + '_' + str(m) + '_K'
                else:
                    warnings.warn('Only numpy and openmm model_types are allowed')

                if db is not None and model_type == 'numpy':
                    sample = db.trace(k)[i][m-1]/4.184
                elif db is not None and model_type == 'openmm':
                    sample = db.trace(k)[i]
                elif model is not None and model_type == 'numpy':
                    sample = model.pymc_parameters[k].value[m-1]/4.184
                elif model is not None and model_type == 'openmm':
                    sample = model.pymc_parameters[k].value

                logger().debug('K sample value {}'.format(sample))
                param.dihedral_types[t][n].phi_k = sample
                param.dihedral_types[reverse_t][n].phi_k = sample
                if phase:
                    p = torsion_name + '_' + str(m) + '_Phase'
                    if db is not None:
                        sample = db.trace(p)[i]
                    if model is not None:
                        sample = model.pymc_parameters[p].value
                    if not continuous:
                        logger().debug('Not continuous')
                        if sample == 1:
                            sample = 180.0
                    logger().debug('Phase sample value {}'.format(sample))
                    param.dihedral_types[t][n].phase = sample
                    param.dihedral_types[reverse_t][n].phase = sample
            else:
                # This torsion periodicity is disabled.
                logger().debug('Turning off {}'.format(m))
                param.dihedral_types[t][n].phi_k = 0
                param.dihedral_types[reverse_t][n].phi_k = 0
Example #10
0
    def __init__(self, param, frags, stream=None,  platform=None, param_to_opt=None, rj=False, sample_n5=False,
                 continuous_phase=False, sample_phase=False, init_random=True):
        """

        Parameters
        ----------
        param : Parmed CharmmParameterSet
        frags : list of torsionfit.QMDataBase
        stream : str
            Path to CHARMM stream file. Default None.
        platform : openmm.Platform
            Default None.
        param_to_opt : list of tuples of torsions.
            Default None.
        rj : bool
            If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False
        sample_n5 : bool
            If True, will also sample n=5. Default False
        eliminate_phase : bool
            If True, will not sample phase. Instead, Ks will be able to take on negative values. Default True. If True,
            make sure continuous_phase is also False.
        continuous_phase : bool
            If True, will allow phases to take on any value between 0-180. If False, phase will be a discrete and only
            sample 0 or 180
        init_random: bool
            Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set.
        tau: float
            hyperparameter on Gaussian prior on K


        Returns
        -------
        pymc model

        """

        if type(frags) != list:
            frags = [frags]

        self.pymc_parameters = dict()
        self.frags = frags
        self.platform = platform
        self.rj = rj
        self.sample_n5 = sample_n5
        self.continuous_phase = continuous_phase
        self.sample_phase = sample_phase
        if param_to_opt:
            self.parameters_to_optimize = param_to_opt
        else:
            self.parameters_to_optimize = TorsionScan.to_optimize(param, stream)

        # Check that options are reasonable
        if not sample_phase and continuous_phase:
            warnings.warn("You can't eliminate phase but have continuous phase. Changing continuous phase to False")
            self.continuous_phase = False

        # set all phases to 0 if eliminate phase is True
        if not self.sample_phase:
            par.set_phase_0(self.parameters_to_optimize, param)

        multiplicities = [1, 2, 3, 4, 6]
        if self.sample_n5:
            multiplicities = [1, 2, 3, 4, 5, 6]
        multiplicity_bitstrings = dict()

        # offset
        for frag in self.frags:
            name = '%s_offset' % frag.topology._residues[0]
            offset = pymc.Uniform(name, lower=-50, upper=50, value=0)
            self.pymc_parameters[name] = offset

        # self.pymc_parameters['log_sigma_k'] = pymc.Uniform('log_sigma_k', lower=-4.6052, upper=3.453, value=np.log(0.01))
        # self.pymc_parameters['sigma_k'] = pymc.Lambda('sigma_k',
        #                                             lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp(
        #                                                log_sigma_k))
        # self.pymc_parameters['precision_k'] = pymc.Lambda('precision_k',
        #                                                lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp(
        #                                                     -2 * log_sigma_k))

        for p in self.parameters_to_optimize:
            torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3]

            self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)] = pymc.Uniform('log_sigma_k_{}'.format(torsion_name), lower=-4.6052, upper=3.453, value=np.log(0.01))
            self.pymc_parameters['sigma_k_{}'.format(torsion_name)] = pymc.Lambda('sigma_k_{}'.format(torsion_name),
                                                    lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                       log_sigma_k))
            self.pymc_parameters['precision_k_{}'.format(torsion_name)] = pymc.Lambda('precision_k_{}'.format(torsion_name),
                                                       lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                            -2 * log_sigma_k))


            if torsion_name not in multiplicity_bitstrings.keys():
                multiplicity_bitstrings[torsion_name] = 0

            for m in multiplicities:
                name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_K'
                if not self.sample_phase:
                    k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)], value=0)
                else:
                    k = pymc.Uniform(name, lower=0, upper=20, value=0)

                for i in range(len(param.dihedral_types[p])):
                    if param.dihedral_types[p][i].per == m:
                        multiplicity_bitstrings[torsion_name] += 2 ** (m - 1)
                        if not self.sample_phase:
                            k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)],
                                            value=param.dihedral_types[p][i].phi_k)
                        else:
                            k = pymc.Uniform(name, lower=0, upper=20, value=param.dihedral_types[p][i].phi_k)
                        break

                self.pymc_parameters[name] = k

                if self.sample_phase:
                    name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_Phase'
                    for i in range(len(param.dihedral_types[p])):
                        if param.dihedral_types[p][i].per == m:
                            if self.continuous_phase:
                                phase = pymc.Uniform(name, lower=0, upper=180.0, value=param.dihedral_types[p][i].phase)
                            else:
                                if param.dihedral_types[p][i].phase == 0:
                                    phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0)
                                    break

                                if param.dihedral_types[p][i].phase == 180.0:
                                    phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=1)
                                    break
                        else:
                            if self.continuous_phase:
                                phase = pymc.Uniform(name, lower=0, upper=180.0, value=0)
                            else:
                                phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0)

                    self.pymc_parameters[name] = phase

        if self.rj:
            for torsion_name in multiplicity_bitstrings.keys():
                name = torsion_name + '_multiplicity_bitstring'
                bitstring = pymc.DiscreteUniform(name, lower=0, upper=63, value=multiplicity_bitstrings[torsion_name])
                self.pymc_parameters[name] = bitstring

        if init_random:
            # randomize initial value
            for parameter in self.pymc_parameters:
                if type(self.pymc_parameters[parameter]) != pymc.CommonDeterministics.Lambda: # and parameter[:11] != 'log_sigma_k':
                    self.pymc_parameters[parameter].random()
                    logger().info('initial value for {} is {}'.format(parameter, self.pymc_parameters[parameter].value))


        self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma', lower=-10, upper=3, value=np.log(0.01))
        self.pymc_parameters['sigma'] = pymc.Lambda('sigma',
                                                    lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                        log_sigma))
        self.pymc_parameters['precision'] = pymc.Lambda('precision',
                                                        lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                            -2 * log_sigma))

        # add missing multiplicity terms to parameterSet so that the system has the same number of parameters
        par.add_missing(self.parameters_to_optimize, param, sample_n5=self.sample_n5)

        @pymc.deterministic
        def mm_energy(pymc_parameters=self.pymc_parameters, param=param):
            mm = np.ndarray(0)
            par.update_param_from_sample(self.parameters_to_optimize, param, model=self, rj=self.rj,
                                         phase=self.sample_phase, n_5=self.sample_n5, continuous=self.continuous_phase,
                                         model_type='openmm')
            for mol in self.frags:
                mol.compute_energy(param, offset=self.pymc_parameters['%s_offset' % mol.topology._residues[0]],
                                   platform=self.platform)
                mm = np.append(mm, mol.mm_energy / kilojoules_per_mole)
            return mm

        size = sum([len(i.qm_energy) for i in self.frags])
        qm_energy = np.ndarray(0)
        for i in range(len(frags)):
             qm_energy = np.append(qm_energy, frags[i].qm_energy)
        #diff_energy = np.ndarray(0)
        #for i in range(len(frags)):
        #    diff_energy = np.append(diff_energy, frags[i].delta_energy)
        self.pymc_parameters['mm_energy'] = mm_energy
        self.pymc_parameters['qm_fit'] = pymc.Normal('qm_fit', mu=self.pymc_parameters['mm_energy'],
                                                     tau=self.pymc_parameters['precision'], size=size, observed=True,
                                                     value=qm_energy)
Example #11
0
def generate_torsions(mol, path, interval):
    """
    This function takes a 3D molecule (pdf, mol2 or sd file) and generates structures for a torsion drive on all torsions
    in the molecule. This function uses OpenEye
    Parameters
    ----------
    mol : str
        path to molecule file (pdb, mol2, sd, etc.)
    path: str
        path to output files
    interval: int
        angle (in degrees) of interval for torsion drive

    """
    filename = mol.split('/')[-1].split('.')[0]
    ifs = oechem.oemolistream(mol)
    inp_mol = oechem.OEMol()
    oechem.OEReadMolecule(ifs, inp_mol)
    ifs.close()

    mid_tors = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(inp_mol)]

    # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH
    smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]'
    qmol=oechem.OEQMol()
    if not oechem.OEParseSmarts(qmol, smarts):
        warnings.warn('OEParseSmarts failed')
    ss = oechem.OESubSearch(qmol)
    mol = oechem.OEMol(inp_mol)
    h_tors = []
    oechem.OEPrepareSearch(mol, ss)
    unique = True
    for match in ss.Match(mol, unique):
        tor = []
        for ma in match.GetAtoms():
            tor.append(ma.target)
        h_tors.append(tor)

    # Combine middle and terminal torsions
    all_tors = mid_tors + h_tors
    # Sort all_tors so that it's grouped by central bond
    central_bonds = np.zeros((len(all_tors), 3), dtype=int)
    for i, tor in enumerate(all_tors):
        central_bonds[i][0] = i
        central_bonds[i][1] = tor[1].GetIdx()
        central_bonds[i][2] = tor[2].GetIdx()

    grouped = central_bonds[central_bonds[:, 2].argsort()]
    sorted_tors = [all_tors[i] for i in grouped[:, 0]]

    # Keep only one torsion per rotatable bond
    tors = []
    best_tor = [sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0]]
    first_pass = True
    for tor in sorted_tors:
        logger().info("Idxs: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx()))
        logger().info("Atom Numbers: {} {} {} {}".format(tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))
        if tor[1].GetIdx() != best_tor[1].GetIdx() or tor[2].GetIdx() != best_tor[2].GetIdx():
            new_tor = True
            if not first_pass:
                logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx()))
                tors.append(best_tor)
            first_pass = False
            best_tor = tor
            best_tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            logger().info("new_tor with central bond across atoms: {} {}".format(tor[1].GetIdx(), tor[2].GetIdx()))
        else:
            logger().info("Not a new_tor but now with end atoms: {} {}".format(tor[0].GetIdx(), tor[3].GetIdx()))
            tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            if tor_order > best_tor_order:
                best_tor = tor
                best_tor_order = tor_order
    logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx()))
    tors.append(best_tor)

    logger().info("List of torsion to drive:")
    for tor in tors:
        logger().info("Idx: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx()))
        logger().info("Atom numbers: {} {} {} {}".format(tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))

    conf = mol.GetConfs().next()
    coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3)
    conf.GetCoords(coords)
    mol.DeleteConfs()

    for tor in tors:
        tor_name = str((tor[0].GetIdx())+1) + '_' + str((tor[1].GetIdx())+1) + '_' + str((tor[2].GetIdx())+1) + '_' + str((tor[3].GetIdx())+1)
        folder = os.path.join(path, tor_name)
        try:
            os.makedirs(folder)
        except FileExistsError:
            logger().info("Overwriting existing directory {}".format(tor_name))
        for angle in range(0, 360, interval):
            angle_folder = os.path.join(folder, str(angle))
            os.makedirs(angle_folder)
            newconf = mol.NewConf(coords)
            oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], radians(angle))
            pdb = oechem.oemolostream('{}/{}_{}_{}.pdb'.format(angle_folder, filename, tor_name, angle))
            oechem.OEWritePDBFile(pdb, newconf)
Example #12
0
def generate_torsions(inp_mol, output_path, interval, base_name=None, tar=True):
    """
    This function takes a 3D molecule (pdf, mol2 or sd file) and generates structures for a torsion drive on all torsions
    in the molecule. This function uses OpenEye
    Parameters
    ----------
    mol : OEMol
        molecule to generate 1D torsion scans
    output_path: str
        path to output file directory
    interval: int
        angle (in degrees) of interval for torsion drive
    base_name: str
        base name for file. Default is None. If default, use title in OEMol for base name
    tar: bool
        If true, will compress output

    """
    if not base_name:
        base_name = inp_mol.GetTitle()

    mid_tors = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(inp_mol)]

    # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH
    smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]'
    qmol=oechem.OEQMol()
    if not oechem.OEParseSmarts(qmol, smarts):
        warnings.warn('OEParseSmarts failed')
    ss = oechem.OESubSearch(qmol)
    mol = oechem.OEMol(inp_mol)
    h_tors = []
    oechem.OEPrepareSearch(mol, ss)
    unique = True
    for match in ss.Match(mol, unique):
        tor = []
        for ma in match.GetAtoms():
            tor.append(ma.target)
        h_tors.append(tor)

    # Combine middle and terminal torsions
    all_tors = mid_tors + h_tors
    # Sort all_tors so that it's grouped by central bond
    central_bonds = np.zeros((len(all_tors), 3), dtype=int)
    for i, tor in enumerate(all_tors):
        central_bonds[i][0] = i
        central_bonds[i][1] = tor[1].GetIdx()
        central_bonds[i][2] = tor[2].GetIdx()

    grouped = central_bonds[central_bonds[:, 2].argsort()]
    sorted_tors = [all_tors[i] for i in grouped[:, 0]]

    # Keep only one torsion per rotatable bond
    tors = []
    best_tor = [sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0]]
    first_pass = True
    for tor in sorted_tors:
        logger().info("Idxs: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx()))
        logger().info("Atom Numbers: {} {} {} {}".format(tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))
        if tor[1].GetIdx() != best_tor[1].GetIdx() or tor[2].GetIdx() != best_tor[2].GetIdx():
            new_tor = True
            if not first_pass:
                logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx()))
                tors.append(best_tor)
            first_pass = False
            best_tor = tor
            best_tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            logger().info("new_tor with central bond across atoms: {} {}".format(tor[1].GetIdx(), tor[2].GetIdx()))
        else:
            logger().info("Not a new_tor but now with end atoms: {} {}".format(tor[0].GetIdx(), tor[3].GetIdx()))
            tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            if tor_order > best_tor_order:
                best_tor = tor
                best_tor_order = tor_order
    logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx()))
    tors.append(best_tor)

    logger().info("List of torsion to drive:")
    for tor in tors:
        logger().info("Idx: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx()))
        logger().info("Atom numbers: {} {} {} {}".format(tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))

    conf = mol.GetConfs().next()
    coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3)
    conf.GetCoords(coords)
    # Check if coordinates are not zero
    values = np.asarray([coords.__getitem__(i) == 0 for i in range(coords.__len__())])
    if values.all():
        # Generate new coordinates.
        mol2 = generate_conformers(mol, max_confs=1)
        conf = mol2.GetConfs().next()
        coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3)
        conf.GetCoords(coords)
        mol2.DeleteConfs()
    mol.DeleteConfs()

    for tor in tors:
        tor_name = str((tor[0].GetIdx())+1) + '_' + str((tor[1].GetIdx())+1) + '_' + str((tor[2].GetIdx())+1) + '_' + str((tor[3].GetIdx())+1)
        folder = os.path.join(output_path, tor_name)
        try:
            os.makedirs(folder)
        except FileExistsError:
            logger().info("Overwriting existing directory {}".format(tor_name))
        for angle in range(0, 360, interval):
            angle_folder = os.path.join(folder, str(angle))
            try:
                os.mkdir(angle_folder)
            except FileExistsError:
                logger().info("Overwriting existing directory {}".format(tor_name))
            newconf = mol.NewConf(coords)
            oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], radians(angle))
            pdb = oechem.oemolostream('{}/{}_{}_{}.pdb'.format(angle_folder, base_name, tor_name, angle))
            oechem.OEWritePDBFile(pdb, newconf)
    if tar:
        # tar archive output
        out = tarfile.open('{}.tar.gz'.format(output_path), mode='w:gz')
        os.chdir(output_path)
        os.chdir('../')
        out.add('{}'.format(base_name))
        out.close()
Example #13
0
def update_param_from_sample(param_list,
                             param,
                             db=None,
                             model=None,
                             i=-1,
                             rj=False,
                             phase=False,
                             n_5=True,
                             continuous=False,
                             model_type='numpy'):
    """
    This function parameterizes sampled torsion with values of sample i in database or current value in pymc model.
    The modifications are in place.

    parameters:
    -----------
     param_list: list
      list of tuples of torsions being sampled [(A, B, C, D), (E, F, G, H)]
     param: parmed.charmm.parameterset
     db: sqlit_plus database or pymc sampler
        default is None
     model: pymc model
        default is None
     i: int, sample to use
        default is -1
     rj: flag if reversible jump is on.
         Default False
     phase: bool
        Flag if phases were sampled. Default is False
     n_5: bool
        Flag if multiplicity of 5 was sampled and also needs to be modified. Default is True.
    model: string
        which torsionfit model was used
    """
    logger().debug('updating parameters')
    if type(param_list) is not list:
        param_list = [param_list]
    for t in param_list:
        torsion_name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3]
        if rj:
            multiplicity_key = torsion_name + '_multiplicity_bitstring'
            if db is not None:
                multiplicity_bitstring = int(db.trace(multiplicity_key)[i])
            if model is not None:
                multiplicity_bitstring = model.pymc_parameters[
                    multiplicity_key].value
        else:
            multiplicity_bitstring = 65
        reverse_t = tuple(reversed(t))
        for n in range(len(param.dihedral_types[t])):
            m = int(param.dihedral_types[t][n].per)
            logger().debug('Working on {}'.format(m))
            multiplicity_bitmask = 2**(m - 1)  # multiplicity bitmask
            if (multiplicity_bitstring & multiplicity_bitmask) or not rj:
                sample = None
                if m == 5 and not n_5:
                    continue
                if model_type == 'numpy':
                    k = torsion_name + '_K'
                elif model_type == 'openmm':
                    k = torsion_name + '_' + str(m) + '_K'
                else:
                    warnings.warn(
                        'Only numpy and openmm model_types are allowed')

                if db is not None and model_type == 'numpy':
                    sample = db.trace(k)[i][m - 1] / 4.184
                elif db is not None and model_type == 'openmm':
                    sample = db.trace(k)[i]
                elif model is not None and model_type == 'numpy':
                    sample = model.pymc_parameters[k].value[m - 1] / 4.184
                elif model is not None and model_type == 'openmm':
                    sample = model.pymc_parameters[k].value

                logger().debug('K sample value {}'.format(sample))
                param.dihedral_types[t][n].phi_k = sample
                param.dihedral_types[reverse_t][n].phi_k = sample
                if phase:
                    p = torsion_name + '_' + str(m) + '_Phase'
                    if db is not None:
                        sample = db.trace(p)[i]
                    if model is not None:
                        sample = model.pymc_parameters[p].value
                    if not continuous:
                        logger().debug('Not continuous')
                        if sample == 1:
                            sample = 180.0
                    logger().debug('Phase sample value {}'.format(sample))
                    param.dihedral_types[t][n].phase = sample
                    param.dihedral_types[reverse_t][n].phase = sample
            else:
                # This torsion periodicity is disabled.
                logger().debug('Turning off {}'.format(m))
                param.dihedral_types[t][n].phi_k = 0
                param.dihedral_types[reverse_t][n].phi_k = 0
Example #14
0
def generate_torsions(inp_mol,
                      output_path,
                      interval,
                      base_name=None,
                      tar=True):
    """
    This function takes a 3D molecule (pdf, mol2 or sd file) and generates structures for a torsion drive on all torsions
    in the molecule. This function uses OpenEye
    Parameters
    ----------
    mol : OEMol
        molecule to generate 1D torsion scans
    output_path: str
        path to output file directory
    interval: int
        angle (in degrees) of interval for torsion drive
    base_name: str
        base name for file. Default is None. If default, use title in OEMol for base name
    tar: bool
        If true, will compress output

    """
    if not base_name:
        base_name = inp_mol.GetTitle()

    mid_tors = [[tor.a, tor.b, tor.c, tor.d]
                for tor in oechem.OEGetTorsions(inp_mol)]

    # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH
    smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]'
    qmol = oechem.OEQMol()
    if not oechem.OEParseSmarts(qmol, smarts):
        warnings.warn('OEParseSmarts failed')
    ss = oechem.OESubSearch(qmol)
    mol = oechem.OEMol(inp_mol)
    h_tors = []
    oechem.OEPrepareSearch(mol, ss)
    unique = True
    for match in ss.Match(mol, unique):
        tor = []
        for ma in match.GetAtoms():
            tor.append(ma.target)
        h_tors.append(tor)

    # Combine middle and terminal torsions
    all_tors = mid_tors + h_tors
    # Sort all_tors so that it's grouped by central bond
    central_bonds = np.zeros((len(all_tors), 3), dtype=int)
    for i, tor in enumerate(all_tors):
        central_bonds[i][0] = i
        central_bonds[i][1] = tor[1].GetIdx()
        central_bonds[i][2] = tor[2].GetIdx()

    grouped = central_bonds[central_bonds[:, 2].argsort()]
    sorted_tors = [all_tors[i] for i in grouped[:, 0]]

    # Keep only one torsion per rotatable bond
    tors = []
    best_tor = [
        sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0],
        sorted_tors[0][0]
    ]
    first_pass = True
    for tor in sorted_tors:
        logger().info("Idxs: {} {} {} {}".format(tor[0].GetIdx(),
                                                 tor[1].GetIdx(),
                                                 tor[2].GetIdx(),
                                                 tor[3].GetIdx()))
        logger().info("Atom Numbers: {} {} {} {}".format(
            tor[0].GetAtomicNum(), tor[1].GetAtomicNum(),
            tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))
        if tor[1].GetIdx() != best_tor[1].GetIdx() or tor[2].GetIdx(
        ) != best_tor[2].GetIdx():
            new_tor = True
            if not first_pass:
                logger().info("Adding to list: {} {} {} {}".format(
                    best_tor[0].GetIdx(), best_tor[1].GetIdx(),
                    best_tor[2].GetIdx(), best_tor[3].GetIdx()))
                tors.append(best_tor)
            first_pass = False
            best_tor = tor
            best_tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            logger().info(
                "new_tor with central bond across atoms: {} {}".format(
                    tor[1].GetIdx(), tor[2].GetIdx()))
        else:
            logger().info("Not a new_tor but now with end atoms: {} {}".format(
                tor[0].GetIdx(), tor[3].GetIdx()))
            tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum()
            if tor_order > best_tor_order:
                best_tor = tor
                best_tor_order = tor_order
    logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(),
                                                       best_tor[1].GetIdx(),
                                                       best_tor[2].GetIdx(),
                                                       best_tor[3].GetIdx()))
    tors.append(best_tor)

    logger().info("List of torsion to drive:")
    for tor in tors:
        logger().info("Idx: {} {} {} {}".format(tor[0].GetIdx(),
                                                tor[1].GetIdx(),
                                                tor[2].GetIdx(),
                                                tor[3].GetIdx()))
        logger().info("Atom numbers: {} {} {} {}".format(
            tor[0].GetAtomicNum(), tor[1].GetAtomicNum(),
            tor[2].GetAtomicNum(), tor[3].GetAtomicNum()))

    conf = mol.GetConfs().next()
    coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3)
    conf.GetCoords(coords)
    # Check if coordinates are not zero
    values = np.asarray(
        [coords.__getitem__(i) == 0 for i in range(coords.__len__())])
    if values.all():
        # Generate new coordinates.
        mol2 = generate_conformers(mol, max_confs=1)
        conf = mol2.GetConfs().next()
        coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3)
        conf.GetCoords(coords)
        mol2.DeleteConfs()
    mol.DeleteConfs()

    for tor in tors:
        tor_name = str((tor[0].GetIdx()) + 1) + '_' + str(
            (tor[1].GetIdx()) + 1) + '_' + str(
                (tor[2].GetIdx()) + 1) + '_' + str((tor[3].GetIdx()) + 1)
        folder = os.path.join(output_path, tor_name)
        try:
            os.makedirs(folder)
        except FileExistsError:
            logger().info("Overwriting existing directory {}".format(tor_name))
        for angle in range(0, 360, interval):
            angle_folder = os.path.join(folder, str(angle))
            try:
                os.mkdir(angle_folder)
            except FileExistsError:
                logger().info(
                    "Overwriting existing directory {}".format(tor_name))
            newconf = mol.NewConf(coords)
            oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3],
                                radians(angle))
            pdb = oechem.oemolostream('{}/{}_{}_{}.pdb'.format(
                angle_folder, base_name, tor_name, angle))
            oechem.OEWritePDBFile(pdb, newconf)
    if tar:
        # tar archive output
        out = tarfile.open('{}.tar.gz'.format(output_path), mode='w:gz')
        os.chdir(output_path)
        os.chdir('../')
        out.add('{}'.format(base_name))
        out.close()
Example #15
0
def generate_fragments(inputf,
                       output_dir,
                       pdf=False,
                       combinatorial=True,
                       MAX_ROTORS=2,
                       strict_stereo=True,
                       remove_map=True):
    """
    This function generates fragment SMILES files sorted by rotatable bonds from an input molecule file.
    The output .smi files are written out to `output_dir` and named `nrotor_n.smi` where n corresponds to the number
    of rotatable bonds for all fragments in the file.
    Parameters
    ----------
    inputf: str
        absolute path to input molecule file
    output_dir: str
        absolute path to output directory
    pdf: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial

    """
    ifs = oechem.oemolistream()
    smiles_unique = set()

    mol = oechem.OEMol()
    if ifs.open(inputf):
        while oechem.OEReadMolecule(ifs, mol):
            openeye.normalize_molecule(mol)
            logger().info('fragmenting {}...'.format(mol.GetTitle()))
            if remove_map:
                # Remove tags from smiles. This is done to make it easier to find duplicate fragments
                for a in mol.GetAtoms():
                    a.SetMapIdx(0)
            frags = _generate_fragments(mol, strict_stereo=strict_stereo)
            if not frags:
                logger().warn('Skipping {}, SMILES: {}'.format(
                    mol.GetTitle(), oechem.OECreateSmiString(mol)))
                continue
            charged = frags[0]
            frags = frags[-1]
            if combinatorial:
                smiles = smiles_with_combined(frags,
                                              charged,
                                              MAX_ROTORS=MAX_ROTORS)
            else:
                smiles = frag_to_smiles(frags, charged)

            smiles_unique.update(list(smiles.keys()))
            if pdf:
                oname = '{}.pdf'.format(mol.GetTitle())
                ToPdf(charged, oname, frags)
            del charged, frags

    # Generate oedatabase for all fragments
    split_fname = inputf.split('.')
    base = split_fname[-2].split('/')[-1]
    ofname = base + '_frags'
    utils.to_smi(list(smiles_unique), output_dir, ofname)
    ofname_ext = ofname + '.smi'
    oedb_name = os.path.join(output_dir, ofname_ext)
    utils.create_oedatabase_idxfile(oedb_name)
    _sort_by_rotbond(oedb_name, outdir=output_dir)
Example #16
0
    def __init__(self, param, frags, stream=None,  param_to_opt=None, rj=False, init_random=True, tau='mult'):
        """

        Parameters
        ----------
        param : Parmed CharmmParameterSet
        frags : list of torsionfit.QMDataBase
        stream : str
            Path to CHARMM stream file. Default None. If None, param_to_opt list must be given. When a stream file is
            specified, param_to_opt is generated if the penalty of the parameters are greater than a threshold.
        param_to_opt : list of tuples of torsions.
            Default None.
        rj : bool
            If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False
        init_random: bool
            Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set.
            Default True
        tau: string.
            options are 'mult' or 'single'. When 'mult', every element in K_m will have its own 'tau', when 'single',
            each K_m will have one tau.
            Default 'mult'

        Returns
        -------
        pymc model

        """

        if type(frags) != list:
            frags = [frags]

        self.pymc_parameters = dict()
        self.frags = frags
        self.rj = rj
        if param_to_opt:
            self.parameters_to_optimize = param_to_opt
        else:
            self.parameters_to_optimize = TorsionScan.to_optimize(param, stream)

        multiplicity_bitstrings = dict()

        # offset
        for frag in self.frags:
            name = '%s_offset' % frag.topology._residues[0]
            offset = pymc.Uniform(name, lower=-50, upper=50, value=0)
            self.pymc_parameters[name] = offset

        if tau=='mult':
            value = np.log(np.ones(6)*0.01)
        elif tau == 'single':
            value = np.log(0.01)
        else:
            raise Exception("Only 'mult' and 'single' are allowed options for tau")

        for p in self.parameters_to_optimize:
            torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3]

            # lower and upper for this distribution are based on empirical data that below this amount the prior is too
            # biased and above the moves are usually rejected.
            self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)] = pymc.Uniform('log_sigma_k_{}'.format(torsion_name),
                                                                                       lower=-4.6052, upper=3.453,
                                                                                       value=value)
            self.pymc_parameters['sigma_k_{}'.format(torsion_name)] = pymc.Lambda('sigma_k_{}'.format(torsion_name),
                                                     lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                        log_sigma_k))
            self.pymc_parameters['precision_k_{}'.format(torsion_name)] = pymc.Lambda('precision_k_{}'.format(torsion_name),
                                lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                            -2 * log_sigma_k))

            self.pymc_parameters['{}_K'.format(torsion_name)] = pymc.Normal('{}_K'.format(torsion_name), value=np.zeros(6), mu=0,
                                                                            tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)])

            if torsion_name not in multiplicity_bitstrings.keys():
                multiplicity_bitstrings[torsion_name] = 0

        if self.rj:
            for torsion_name in multiplicity_bitstrings.keys():
                name = torsion_name + '_multiplicity_bitstring'
                bitstring = pymc.DiscreteUniform(name, lower=0, upper=63, value=multiplicity_bitstrings[torsion_name])
                self.pymc_parameters[name] = bitstring

        if init_random:
            # randomize initial value
            for parameter in self.pymc_parameters:
                if type(self.pymc_parameters[parameter]) != pymc.CommonDeterministics.Lambda and parameter[:11] != 'log_sigma_k':
                    self.pymc_parameters[parameter].random()
                    logger().info('initial value for {} is {}'.format(parameter, self.pymc_parameters[parameter].value))

        self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma', lower=-10, upper=3, value=np.log(0.01))
        self.pymc_parameters['sigma'] = pymc.Lambda('sigma',
                                                    lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                        log_sigma))
        self.pymc_parameters['precision'] = pymc.Lambda('precision',
                                                        lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                            -2 * log_sigma))

        # Precalculate phis
        n = np.array([1., 2., 3., 4., 5., 6.])
        self.models = []
        for i in itertools.product((0, 1), repeat=6):
            self.models.append(i)

        inner_sum = []
        for i, frag in enumerate(frags):
            inner_sum.append(OrderedDict())
            for t in frag.phis:
                inner_sum[i][t] = (1 + np.cos(frag.phis[t][:, np.newaxis]*n[:, np.newaxis])).sum(-1)
        self.inner_sum = inner_sum

        @pymc.deterministic
        def torsion_energy(pymc_parameters=self.pymc_parameters):
            mm = np.ndarray(0)

            for i, mol in enumerate(self.frags):
                Fourier_sum = np.zeros((mol.n_frames))
                for t in inner_sum[i]:
                    name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3]
                    if self.rj:
                        K = pymc_parameters['{}_K'.format(name)] * self.models[pymc_parameters['{}_multiplicity_bitstring'.format(name)]]
                    else:
                        K = pymc_parameters['{}_K'.format(name)]
                    Fourier_sum += (K*inner_sum[i][t]).sum(1)
                Fourier_sum_rel = Fourier_sum - min(Fourier_sum)
                Fourier_sum_rel += pymc_parameters['{}_offset'.format(mol.topology._residues[0])]
                mm = np.append(mm, Fourier_sum)
            return mm

        size = sum([len(i.qm_energy) for i in self.frags])
        residual_energy = np.ndarray(0)
        for i in range(len(frags)):
            residual_energy = np.append(residual_energy, frags[i].delta_energy)

        self.pymc_parameters['torsion_energy'] = torsion_energy
        self.pymc_parameters['qm_fit'] = pymc.Normal('qm_fit', mu=self.pymc_parameters['torsion_energy'],
                                                     tau=self.pymc_parameters['precision'], size=size, observed=True,
                                                     value=residual_energy)