Esempio n. 1
0
def proteinPrepare(mol_in,
                   pH=7.0,
                   verbose=0,
                   returnDetails=False,
                   hydrophobicThickness=None,
                   holdSelection=None):
    """A system preparation wizard for HTMD.

    Returns a Molecule object, where residues have been renamed to follow
    internal conventions on protonation (below). Coordinates are changed to
    optimize the H-bonding network. This should be roughly equivalent to mdweb and Maestro's
    preparation wizard.

    The following residue names are used in the returned molecule:

    === ===============================
    ASH Neutral ASP
    CYX SS-bonded CYS
    CYM Negative CYS
    GLH Neutral GLU
    HIP Positive HIS
    HID Neutral HIS, proton HD1 present
    HIE Neutral HIS, proton HE2 present
    LYN Neutral LYS
    TYM Negative TYR
    AR0 Neutral ARG
    === ===============================

    ========= ======= =========
    Charge +1 Neutral Charge -1
    ========= ======= =========
    -         ASH     ASP
    -         CYS     CYM
    -         GLH     GLU
    HIP       HID/HIE -
    LYS       LYN     -
    -         TYR     TYM
    ARG       AR0     -
    ========= ======= =========

    A detailed table about the residues modified is returned (as a second return value) when
    returnDetails is True (see PreparationData object).

    If hydrophobicThickness is set to a positive value 2*h, a warning is produced for titratable residues
    having -h<z<h and are buried in the protein by less than 75%. Note that the heuristic for the
    detection of membrane-exposed residues is very crude; the "buried fraction" computation
    (from propKa) is approximate; also, in the presence of cavities,
    residues may be solvent-exposed independently from their z location.


    Notes
    -----
    In case of problems, exclude water and other dummy atoms.

    Features:
     - assign protonation states via propKa
     - flip residues to optimize H-bonding network
     - debump collisions
     - fill-in missing atoms, e.g. hydrogen atoms


    Parameters
    ----------
    mol_in : htmd.Molecule
        the object to be optimized
    pH : float
        pH to decide titration
    verbose : int
        verbosity
    returnDetails : bool
        whether to return just the prepared Molecule (False, default) or a molecule *and* a ResidueInfo
        object including computed properties
    hydrophobicThickness : float
        the thickness of the membrane in which the protein is embedded, or None if globular protein.
        Used to provide a warning about membrane-exposed residues.
    holdSelection : str
        (Untested) Atom selection to be excluded from optimization.
        Only the carbon-alpha atom will be considered for the corresponding residue.


    Returns
    -------
    mol_out : Molecule
        the molecule titrated and optimized. The molecule object contains an additional attribute,
    resData : PreparationData
        a table of residues with the corresponding protonation states, pKas, and other information


    Examples
    --------
    >>> tryp = Molecule('3PTB')

    >>> tryp_op, prepData = proteinPrepare(tryp, returnDetails=True)
    >>> tryp_op.write('proteinpreparation-test-main-ph-7.pdb')
    >>> prepData.data.to_excel("/tmp/tryp-report.xlsx")
    >>> prepData                                                        # doctest: +NORMALIZE_WHITESPACE
    PreparationData object about 290 residues.
    Unparametrized residue names: CA, BEN
    Please find the full info in the .data property, e.g.:
      resname  resid insertion chain       pKa protonation flipped     buried
    0     ILE     16               A       NaN         ILE     NaN        NaN
    1     VAL     17               A       NaN         VAL     NaN        NaN
    2     GLY     18               A       NaN         GLY     NaN        NaN
    3     GLY     19               A       NaN         GLY     NaN        NaN
    4     TYR     20               A  9.590845         TYR     NaN  14.642857
     . . .
    >>> x_HIE91_ND1 = tryp_op.get("coords","resid 91 and  name ND1")
    >>> x_SER93_H =   tryp_op.get("coords","resid 93 and  name H")
    >>> len(x_SER93_H) == 3
    True
    >>> np.linalg.norm(x_HIE91_ND1-x_SER93_H) < 3
    True

    >>> tryp_op = proteinPrepare(tryp, pH=1.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-1.pdb')

    >>> tryp_op = proteinPrepare(tryp, pH=14.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-14.pdb')

    >>> mol = Molecule("1r1j")
    >>> mo, prepData = proteinPrepare(mol, returnDetails=True)
    >>> prepData.missedLigands
    ['NAG', 'ZN', 'OIR']

    >>> his = prepData.data.resname == "HIS"
    >>> prepData.data[his][["resid","insertion","chain","resname","protonation"]]
         resid insertion chain resname protonation
    160    214               A     HIS         HID
    163    217               A     HIS         HID
    383    437               A     HIS         HID
    529    583               A     HIS         HID
    533    587               A     HIS         HIP
    583    637               A     HIS         HID
    627    681               A     HIS         HID
    657    711               A     HIS         HIP
    679    733               A     HIS         HID

    >>> mor = Molecule("4dkl")
    >>> mor.filter("protein and noh")
    >>> mor_opt, mor_data = proteinPrepare(mor, returnDetails=True,
    ...                                    hydrophobicThickness=32.0)
    >>> exposedRes = mor_data.data.membraneExposed
    >>> mor_data.data[exposedRes].to_excel("/tmp/mor_exposed_residues.xlsx")

    >>> im=Molecule("4bkj")
    >>> imo,imd=proteinPrepare(im,returnDetails=True)
    >>> imd.data.to_excel("/tmp/imatinib_report.xlsx")

    See Also
    --------
    :class:`htmd.builder.PreparationData.PreparationData`

    Notes
    -----
    Unsupported/To Do/To Check:
     - ligands
     - termini
     - multiple chains
     - nucleic acids
     - coupled titrating residues
     - Disulfide bridge detection (implemented but unused)
    """

    oldLoggingLevel = logger.level
    if verbose:
        logger.setLevel(logging.DEBUG)
    logger.debug("Starting.")

    _warnIfContainsDUM(mol_in)

    # We could transform the molecule into an internal object, but for
    # now I prefer to rely on the strange internal parser to avoid
    # hidden quirks.
    tmpin = tempfile.NamedTemporaryFile(suffix=".pdb", mode="w+")
    logger.debug("Temporary file is " + tmpin.name)
    mol_in.write(tmpin.name)  # Not sure this is good unix

    pdblist, errlist = readPDB(tmpin)
    if len(pdblist) == 0 and len(errlist) == 0:
        raise Exception('Internal error in preparing input to pdb2pqr')

    # An ugly hack to silence non-prefixed logging messages
    for h in propka.lib.logger.handlers:
        if h.formatter._fmt == '%(message)s':
            propka.lib.logger.removeHandler(h)

    propka_opts, dummy = propka.lib.loadOptions('--quiet')
    propka_opts.verbosity = verbose
    propka_opts.verbose = verbose  # Will be removed in future propKas

    # Note on naming. The behavior of PDB2PQR is controlled by two
    # parameters, ff and ffout. My understanding is that the ff
    # parameter sets which residues are SUPPORTED by the underlying
    # FF, PLUS the charge and radii.  The ffout parameter sets the
    # naming scheme. Therefore, I want ff to be as general as
    # possible, which turns out to be "parse". Then I pick a
    # convenient ffout.

    # Hold list (None -> None)
    hlist = _selToHoldList(mol_in, holdSelection)
    if hlist:
        logger.warning("The holdSelection option is untested and deprecated. Please use reprepare()")

    # Relying on defaults
    pqr_res = runPDB2PQR(pdblist,
                         ph=pH, verbose=verbose,
                         ff="parse", ffout="amber",
                         ph_calc_method="propka31",
                         ph_calc_options=propka_opts,
                         holdList=hlist)
    try:
        header, pqr, missedLigands, pdb2pqr_protein, pdb2pqr_routines = \
            pqr_res['header'], pqr_res['lines'], pqr_res['missedligands'], pqr_res['protein'], pqr_res['routines']
    except:
        logger.error("Problem calling pdb2pqr. Make sure you have htmd-pdb2pqr >= 2.1.2a9")
        raise

    tmpin.close()

    # Diagnostics
    for missedligand in missedLigands:
        logger.warning("The following residue has not been optimized: " + missedligand)

    mol_out, resData = _buildResAndMol(pdb2pqr_protein)

    mol_out.box = mol_in.box
    _fixupWaterNames(mol_out)

    # Misc. info
    resData.header = header
    resData.pqr = pqr

    # Return residue information
    resData.pdb2pqr_protein = pdb2pqr_protein
    resData.pdb2pqr_routines = pdb2pqr_routines
    resData.missedLigands = missedLigands

    # Store un-reprepared info
    resData.data['default_protonation'] = resData.data['protonation']

    resData._listNonStandardResidues()
    resData._warnIfpKCloseTopH(pH)
    resData.warnIfTerminiSuspect()

    if hydrophobicThickness:
        resData._setMembraneExposureAndWarn(hydrophobicThickness)

    logger.debug("Returning.")
    logger.setLevel(oldLoggingLevel)

    if returnDetails:
        return mol_out, resData
    else:
        return mol_out
Esempio n. 2
0
def proteinPrepare(mol_in,
                   pH=7.0,
                   verbose=0,
                   returnDetails=False,
                   hydrophobicThickness=None,
                   holdSelection=None):
    """A system preparation wizard for HTMD.

    Returns a Molecule object, where residues have been renamed to follow
    internal conventions on protonation (below). Coordinates are changed to
    optimize the H-bonding network. This should be roughly equivalent to mdweb and Maestro's
    preparation wizard.

    The following residue names are used in the returned molecule:

    === ===============================
    ASH Neutral ASP
    CYX SS-bonded CYS
    CYM Negative CYS
    GLH Neutral GLU
    HIP Positive HIS
    HID Neutral HIS, proton HD1 present
    HIE Neutral HIS, proton HE2 present
    LYN Neutral LYS
    TYM Negative TYR
    AR0 Neutral ARG
    === ===============================

    ========= ======= =========
    Charge +1 Neutral Charge -1
    ========= ======= =========
    -         ASH     ASP
    -         CYS     CYM
    -         GLH     GLU
    HIP       HID/HIE -
    LYS       LYN     -
    -         TYR     TYM
    ARG       AR0     -
    ========= ======= =========

    A detailed table about the residues modified is returned (as a second return value) when
    returnDetails is True (see PreparationData object).

    If hydrophobicThickness is set to a positive value 2*h, a warning is produced for titratable residues
    having -h<z<h and are buried in the protein by less than 75%. Note that the heuristic for the
    detection of membrane-exposed residues is very crude; the "buried fraction" computation
    (from propKa) is approximate; also, in the presence of cavities,
    residues may be solvent-exposed independently from their z location.


    Notes
    -----
    In case of problems, exclude water and other dummy atoms.

    Features:
     - assign protonation states via propKa
     - flip residues to optimize H-bonding network
     - debump collisions
     - fill-in missing atoms, e.g. hydrogen atoms


    Parameters
    ----------
    mol_in : htmd.Molecule
        the object to be optimized
    pH : float
        pH to decide titration
    verbose : int
        verbosity
    returnDetails : bool
        whether to return just the prepared Molecule (False, default) or a molecule *and* a ResidueInfo
        object including computed properties
    hydrophobicThickness : float
        the thickness of the membrane in which the protein is embedded, or None if globular protein.
        Used to provide a warning about membrane-exposed residues.
    holdSelection : str
        (Untested) Atom selection to be excluded from optimization.
        Only the carbon-alpha atom will be considered for the corresponding residue.


    Returns
    -------
    mol_out : Molecule
        the molecule titrated and optimized. The molecule object contains an additional attribute,
    resData : PreparationData
        a table of residues with the corresponding protonation states, pKas, and other information


    Examples
    --------
    >>> tryp = Molecule('3PTB')

    >>> tryp_op, prepData = proteinPrepare(tryp, returnDetails=True)
    >>> tryp_op.write('proteinpreparation-test-main-ph-7.pdb')
    >>> prepData.data.to_excel("/tmp/tryp-report.xlsx")
    >>> prepData                                                        # doctest: +NORMALIZE_WHITESPACE
    PreparationData object about 290 residues.
    Unparametrized residue names: CA, BEN
    Please find the full info in the .data property, e.g.:
      resname  resid insertion chain       pKa protonation flipped     buried
    0     ILE     16               A       NaN         ILE     NaN        NaN
    1     VAL     17               A       NaN         VAL     NaN        NaN
    2     GLY     18               A       NaN         GLY     NaN        NaN
    3     GLY     19               A       NaN         GLY     NaN        NaN
    4     TYR     20               A  9.590845         TYR     NaN  14.642857
     . . .
    >>> x_HIE91_ND1 = tryp_op.get("coords","resid 91 and  name ND1")
    >>> x_SER93_H =   tryp_op.get("coords","resid 93 and  name H")
    >>> len(x_SER93_H) == 3
    True
    >>> np.linalg.norm(x_HIE91_ND1-x_SER93_H) < 3
    True

    >>> tryp_op = proteinPrepare(tryp, pH=1.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-1.pdb')

    >>> tryp_op = proteinPrepare(tryp, pH=14.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-14.pdb')

    >>> mol = Molecule("1r1j")
    >>> mo, prepData = proteinPrepare(mol, returnDetails=True)
    >>> prepData.missedLigands
    ['NAG', 'ZN', 'OIR']

    >>> his = prepData.data.resname == "HIS"
    >>> prepData.data[his][["resid","insertion","chain","resname","protonation"]]
         resid insertion chain resname protonation
    160    214               A     HIS         HID
    163    217               A     HIS         HID
    383    437               A     HIS         HID
    529    583               A     HIS         HID
    533    587               A     HIS         HIP
    583    637               A     HIS         HID
    627    681               A     HIS         HID
    657    711               A     HIS         HIP
    679    733               A     HIS         HID

    >>> mor = Molecule("4dkl")
    >>> mor.filter("protein and noh")
    >>> mor_opt, mor_data = proteinPrepare(mor, returnDetails=True,
    ...                                    hydrophobicThickness=32.0)
    >>> exposedRes = mor_data.data.membraneExposed
    >>> mor_data.data[exposedRes].to_excel("/tmp/mor_exposed_residues.xlsx")

    >>> im=Molecule("4bkj")
    >>> imo,imd=proteinPrepare(im,returnDetails=True)
    >>> imd.data.to_excel("/tmp/imatinib_report.xlsx")

    See Also
    --------
    :class:`htmd.builder.PreparationData.PreparationData`

    Notes
    -----
    Unsupported/To Do/To Check:
     - ligands
     - termini
     - multiple chains
     - nucleic acids
     - coupled titrating residues
     - Disulfide bridge detection (implemented but unused)
    """

    oldLoggingLevel = logger.level
    if verbose:
        logger.setLevel(logging.DEBUG)
    logger.debug("Starting.")

    _warnIfContainsDUM(mol_in)

    # We could transform the molecule into an internal object, but for
    # now I prefer to rely on the strange internal parser to avoid
    # hidden quirks.
    tmpin = tempfile.NamedTemporaryFile(suffix=".pdb", mode="w+")
    logger.debug("Temporary file is " + tmpin.name)
    mol_in.write(tmpin.name)  # Not sure this is good unix

    pdblist, errlist = readPDB(tmpin)
    if len(pdblist) == 0 and len(errlist) == 0:
        raise Exception('Internal error in preparing input to pdb2pqr')

    # An ugly hack to silence non-prefixed logging messages
    for h in propka.lib.logger.handlers:
        if h.formatter._fmt == '%(message)s':
            propka.lib.logger.removeHandler(h)

    propka_opts, dummy = propka.lib.loadOptions('--quiet')
    propka_opts.verbosity = verbose
    propka_opts.verbose = verbose  # Will be removed in future propKas

    # Note on naming. The behavior of PDB2PQR is controlled by two
    # parameters, ff and ffout. My understanding is that the ff
    # parameter sets which residues are SUPPORTED by the underlying
    # FF, PLUS the charge and radii.  The ffout parameter sets the
    # naming scheme. Therefore, I want ff to be as general as
    # possible, which turns out to be "parse". Then I pick a
    # convenient ffout.

    # Hold list (None -> None)
    hlist = _selToHoldList(mol_in, holdSelection)
    if hlist:
        logger.warning(
            "The holdSelection option is untested and deprecated. Please use reprepare()"
        )

    # Relying on defaults
    pqr_res = runPDB2PQR(pdblist,
                         ph=pH,
                         verbose=verbose,
                         ff="parse",
                         ffout="amber",
                         ph_calc_method="propka31",
                         ph_calc_options=propka_opts,
                         holdList=hlist)
    try:
        header, pqr, missedLigands, pdb2pqr_protein, pdb2pqr_routines = \
            pqr_res['header'], pqr_res['lines'], pqr_res['missedligands'], pqr_res['protein'], pqr_res['routines']
    except:
        logger.error(
            "Problem calling pdb2pqr. Make sure you have htmd-pdb2pqr >= 2.1.2a9"
        )
        raise

    tmpin.close()

    # Diagnostics
    for missedligand in missedLigands:
        logger.warning("The following residue has not been optimized: " +
                       missedligand)

    mol_out, resData = _buildResAndMol(pdb2pqr_protein)

    mol_out.box = mol_in.box
    _fixupWaterNames(mol_out)

    # Misc. info
    resData.header = header
    resData.pqr = pqr

    # Return residue information
    resData.pdb2pqr_protein = pdb2pqr_protein
    resData.pdb2pqr_routines = pdb2pqr_routines
    resData.missedLigands = missedLigands

    # Store un-reprepared info
    resData.data['default_protonation'] = resData.data['protonation']

    resData._listNonStandardResidues()
    resData._warnIfpKCloseTopH(pH)
    resData.warnIfTerminiSuspect()

    if hydrophobicThickness:
        resData._setMembraneExposureAndWarn(hydrophobicThickness)

    logger.debug("Returning.")
    logger.setLevel(oldLoggingLevel)

    if returnDetails:
        return mol_out, resData
    else:
        return mol_out
Esempio n. 3
0
def prepareProtein(mol_in,
                   pH=7.0,
                   verbose=0,
                   returnDetails=False,
                   hydrophobicThickness=None,
                   holdSelection=None):
    """A system preparation wizard for HTMD.

    Returns a Molecule object, where residues have been renamed to follow
    internal conventions on protonation (below). Coordinates are changed to
    optimize the H-bonding network. This should be roughly equivalent to mdweb and Maestro's
    preparation wizard.

    The following residue names are used in the returned molecule:

        ASH 	Neutral ASP
        CYX 	SS-bonded CYS
        CYM 	Negative CYS
        GLH 	Neutral GLU
        HIP 	Positive HIS
        HID 	Neutral HIS, proton HD1 present
        HIE 	Neutral HIS, proton HE2 present
        LYN 	Neutral LYS
        TYM 	Negative TYR
        AR0     Neutral ARG

    If hydrophobicThickness is set to a positive value 2*h, a warning is produced for titratable residues
    having -h<z<h and are buried in the protein by less than 75%. The list of such residues can be accessed setting
    returnDetails to True. Note that the heuristic for the detection of membrane-exposed residues is very crude;
    the "buried fraction" computation (from propka) is approximate; also, in the presence of cavities,
    residues may be solvent-exposed independently from their z location.


    Notes
    -----
    In case of problems, exclude water and other dummy atoms.


    Features
    --------
     - assign protonation states via propKa
     - flip residues to optimize H-bonding network
     - debump collisions
     - fill-in missing atoms, e.g. hydrogen atoms


    Parameters
    ----------
    mol_in : htmd.Molecule
        the object to be optimized
    pH : float
        pH to decide titration
    verbose : int
        verbosity
    returnDetails : bool
        whether to return just the prepared Molecule (False, default) or a molecule *and* a ResidueInfo
        object including computed properties
    hydrophobicThickness : float
        the thickness of the membrane in which the protein is embedded, or None if globular protein.
        Used to provide a warning about membrane-exposed residues.
    holdSelection : str
        Atom selection to be excluded from optimization.
        Only the carbon-alpha atom will be considered for the corresponding residue.


    Returns
    -------
    mol_out : Molecule
        the molecule titrated and optimized. The molecule object contains an additional attribute,
    resData : ResidueData
        a table of residues with the corresponding protonation states, pKas, and other information


    Examples
    --------
    >>> tryp = Molecule('3PTB')

    >>> tryp_op, prepData = prepareProtein(tryp, returnDetails=True)
    >>> tryp_op.write('proteinpreparation-test-main-ph-7.pdb')
    >>> prepData.data.to_excel("/tmp/tryp-report.xlsx")
    >>> prepData
    ResidueData object about 290 residues.
    Unparametrized residue names: CA, BEN
    Please find the full info in the .data property, e.g.:
      resname  resid insertion chain       pKa protonation flipped     buried
    0     ILE     16               A       NaN         ILE     NaN        NaN
    1     VAL     17               A       NaN         VAL     NaN        NaN
    2     GLY     18               A       NaN         GLY     NaN        NaN
    3     GLY     19               A       NaN         GLY     NaN        NaN
    4     TYR     20               A  9.590845         TYR     NaN  14.642857
     . . .
    >>> x_HIE91_ND1 = tryp_op.get("coords","resid 91 and  name ND1")
    >>> x_SER93_H =   tryp_op.get("coords","resid 93 and  name H")
    >>> len(x_SER93_H) == 3
    True
    >>> np.linalg.norm(x_HIE91_ND1-x_SER93_H) < 3
    True

    >>> tryp_op = prepareProtein(tryp, pH=1.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-1.pdb')

    >>> tryp_op = prepareProtein(tryp, pH=14.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-14.pdb')

    >>> mol = Molecule("1r1j")
    >>> mo, prepData = prepareProtein(mol, returnDetails=True)
    >>> prepData.missedLigands
    ['NAG', 'ZN', 'OIR']

    >>> his = prepData.data.resname == "HIS"
    >>> prepData.data[his][["resid","insertion","chain","resname","protonation"]]
         resid insertion chain resname protonation
    160    214               A     HIS         HID
    163    217               A     HIS         HID
    383    437               A     HIS         HID
    529    583               A     HIS         HID
    533    587               A     HIS         HIP
    583    637               A     HIS         HID
    627    681               A     HIS         HID
    657    711               A     HIS         HIP
    679    733               A     HIS         HID

    >>> mor = Molecule("4dkl")
    >>> mor.filter("protein and noh")
    >>> mor_opt, mor_data = prepareProtein(mor, returnDetails=True, 
    ...                                    hydrophobicThickness=32.0)
    >>> exposedRes = mor_data.data.membraneExposed
    >>> mor_data.data[exposedRes].to_excel("/tmp/mor_exposed_residues.xlsx")

    >>> im=Molecule("4bkj")
    >>> imo,imd=prepareProtein(im,returnDetails=True)
    >>> imd.data.to_excel("/tmp/imatinib_report.xlsx")


    See Also
    --------
    The ResidueData object.


    Unsupported/To Do/To Check
    --------------------------
     - ligands
     - termini
     - force residues
     - multiple chains
     - nucleic acids
     - reporting in machine-readable form
     - coupled titrating residues
     - Disulfide bridge detection (implemented but unused)

    """

    oldLoggingLevel = logger.level
    if verbose:
        logger.setLevel(logging.DEBUG)
    logger.debug("Starting.")

    # We could transform the molecule into an internal object, but for
    # now I prefer to rely on the strange internal parser to avoid
    # hidden quirks.
    tmpin = tempfile.NamedTemporaryFile(suffix=".pdb", mode="w+")
    logger.debug("Temporary file is " + tmpin.name)
    mol_in.write(tmpin.name)  # Not sure this is sound unix

    pdblist, errlist = readPDB(tmpin)
    if len(pdblist) == 0 and len(errlist) == 0:
        raise Exception('Internal error in preparing input to pdb2pqr')

    # We could set additional options here
    import propka.lib

    # An ugly hack to silence non-prefixed logging messages
    for h in propka.lib.logger.handlers:
        if h.formatter._fmt == '%(message)s':
            propka.lib.logger.removeHandler(h)

    propka_opts, dummy = propka.lib.loadOptions('--quiet')
    propka_opts.verbosity = verbose
    propka_opts.verbose = verbose  # Will be removed in future propKas

    # Note on naming. The behavior of PDB2PQR is controlled by two
    # parameters, ff and ffout. My understanding is that the ff
    # parameter sets which residues are SUPPORTED by the underlying
    # FF, PLUS the charge and radii.  The ffout parameter sets the
    # naming scheme. Therefore, I want ff to be as general as
    # possible, which turns out to be "parse". Then I pick a
    # convenient ffout.

    # Hold list (None -> None)
    hlist = _selToHoldList(mol_in, holdSelection)


    # Relying on defaults
    header, pqr, missedLigands, pdb2pqr_protein = runPDB2PQR(pdblist,
                                                               ph=pH, verbose=verbose,
                                                               ff="parse", ffout="amber",
                                                               ph_calc_method="propka31",
                                                               ph_calc_options=propka_opts,
                                                               holdList=hlist)
    tmpin.close()

    # Diagnostics
    for missedligand in missedLigands:
        logger.warning("The following residue has not been optimized: " + missedligand)

    # Here I parse the returned protein object and recreate a Molecule,
    # because I need to access the properties.
    logger.debug("Building Molecule object.")

    name = []
    resid = []
    chain = []
    insertion = []
    coords = []
    resname = []
    segids = []
    elements = []

    resData = ResidueData()

    resData.header = header
    resData.pqr = pqr

    for residue in pdb2pqr_protein.residues:
        # if 'ffname' in residue.__dict__:
        if getattr(residue,'ffname',None):
            curr_resname = residue.ffname
            if len(curr_resname) >= 4:
                curr_resname = curr_resname[-3:]
                logger.debug("Residue %s has internal name %s, replacing with %s" %
                            (residue, residue.ffname, curr_resname))
        else:
            curr_resname = residue.name

        resData._setProtonationState(residue, curr_resname)

        #if 'patches' in residue.__dict__:
        if getattr(residue, 'patches', None):
            for patch in residue.patches:
                resData._appendPatches(residue, patch)
                if patch != "PEPTIDE":
                    logger.debug("Residue %s has patch %s set" % (residue, patch))

        if getattr(residue, 'wasFlipped', 'UNDEF') != 'UNDEF':
            resData._setFlipped(residue, residue.wasFlipped)


        for atom in residue.atoms:
            name.append(atom.name)
            resid.append(residue.resSeq)
            chain.append(residue.chainID)
            insertion.append(residue.iCode)
            coords.append([atom.x, atom.y, atom.z])
            resname.append(curr_resname)
            segids.append(atom.segID)
            elements.append(atom.element)

    mol_out = _fillMolecule(name, resname, chain, resid, insertion, coords, segids, elements)
    _fixupWaterNames(mol_out)

    # Return residue information
    resData._importPKAs(pdb2pqr_protein.pka_protein)
    resData.pdb2pqr_protein = pdb2pqr_protein
    resData.missedLigands = missedLigands

    resData._warnIfpKCloseTopH(pH)

    if hydrophobicThickness:
        resData._setMembraneExposureAndWarn(hydrophobicThickness)

    logger.debug("Returning.")
    logger.setLevel(oldLoggingLevel)

    if returnDetails:
        return mol_out, resData
    else:
        return mol_out
Esempio n. 4
0
def proteinPrepare(mol_in,
                   pH=7.0,
                   verbose=0,
                   returnDetails=False,
                   hydrophobicThickness=None,
                   holdSelection=None):
    """A system preparation wizard for HTMD.

    Returns a Molecule object, where residues have been renamed to follow
    internal conventions on protonation (below). Coordinates are changed to
    optimize the H-bonding network. This should be roughly equivalent to mdweb and Maestro's
    preparation wizard.

    The following residue names are used in the returned molecule:

        ASH 	Neutral ASP
        CYX 	SS-bonded CYS
        CYM 	Negative CYS
        GLH 	Neutral GLU
        HIP 	Positive HIS
        HID 	Neutral HIS, proton HD1 present
        HIE 	Neutral HIS, proton HE2 present
        LYN 	Neutral LYS
        TYM 	Negative TYR
        AR0     Neutral ARG

    Charge +1    |  Neutral   | Charge -1
    -------------|------------|----------
     -           |  ASH       | ASP
     -           |  CYS       | CYM
     -           |  GLH       | GLU
    HIP          |  HID/HIE   |  -
    LYS          |  LYN       |  -
     -           |  TYR       | TYM
    ARG          |  AR0       |  -

    A detailed table about the residues modified is returned (as a second return value) when
    returnDetails is True (see ResidueData object).

    If hydrophobicThickness is set to a positive value 2*h, a warning is produced for titratable residues
    having -h<z<h and are buried in the protein by less than 75%. Note that the heuristic for the
    detection of membrane-exposed residues is very crude; the "buried fraction" computation
    (from propKa) is approximate; also, in the presence of cavities,
    residues may be solvent-exposed independently from their z location.


    Notes
    -----
    In case of problems, exclude water and other dummy atoms.


    Features
    --------
     - assign protonation states via propKa
     - flip residues to optimize H-bonding network
     - debump collisions
     - fill-in missing atoms, e.g. hydrogen atoms


    Parameters
    ----------
    mol_in : htmd.Molecule
        the object to be optimized
    pH : float
        pH to decide titration
    verbose : int
        verbosity
    returnDetails : bool
        whether to return just the prepared Molecule (False, default) or a molecule *and* a ResidueInfo
        object including computed properties
    hydrophobicThickness : float
        the thickness of the membrane in which the protein is embedded, or None if globular protein.
        Used to provide a warning about membrane-exposed residues.
    holdSelection : str
        Atom selection to be excluded from optimization.
        Only the carbon-alpha atom will be considered for the corresponding residue.


    Returns
    -------
    mol_out : Molecule
        the molecule titrated and optimized. The molecule object contains an additional attribute,
    resData : ResidueData
        a table of residues with the corresponding protonation states, pKas, and other information


    Examples
    --------
    >>> tryp = Molecule('3PTB')

    >>> tryp_op, prepData = proteinPrepare(tryp, returnDetails=True)
    >>> tryp_op.write('proteinpreparation-test-main-ph-7.pdb')
    >>> prepData.data.to_excel("/tmp/tryp-report.xlsx")
    >>> prepData
    ResidueData object about 290 residues.
    Unparametrized residue names: CA, BEN
    Please find the full info in the .data property, e.g.:
      resname  resid insertion chain       pKa protonation flipped     buried
    0     ILE     16               A       NaN         ILE     NaN        NaN
    1     VAL     17               A       NaN         VAL     NaN        NaN
    2     GLY     18               A       NaN         GLY     NaN        NaN
    3     GLY     19               A       NaN         GLY     NaN        NaN
    4     TYR     20               A  9.590845         TYR     NaN  14.642857
     . . .
    >>> x_HIE91_ND1 = tryp_op.get("coords","resid 91 and  name ND1")
    >>> x_SER93_H =   tryp_op.get("coords","resid 93 and  name H")
    >>> len(x_SER93_H) == 3
    True
    >>> np.linalg.norm(x_HIE91_ND1-x_SER93_H) < 3
    True

    >>> tryp_op = proteinPrepare(tryp, pH=1.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-1.pdb')

    >>> tryp_op = proteinPrepare(tryp, pH=14.0)
    >>> tryp_op.write('proteinpreparation-test-main-ph-14.pdb')

    >>> mol = Molecule("1r1j")
    >>> mo, prepData = proteinPrepare(mol, returnDetails=True)
    >>> prepData.missedLigands
    ['NAG', 'ZN', 'OIR']

    >>> his = prepData.data.resname == "HIS"
    >>> prepData.data[his][["resid","insertion","chain","resname","protonation"]]
         resid insertion chain resname protonation
    160    214               A     HIS         HID
    163    217               A     HIS         HID
    383    437               A     HIS         HID
    529    583               A     HIS         HID
    533    587               A     HIS         HIP
    583    637               A     HIS         HID
    627    681               A     HIS         HID
    657    711               A     HIS         HIP
    679    733               A     HIS         HID

    >>> mor = Molecule("4dkl")
    >>> mor.filter("protein and noh")
    >>> mor_opt, mor_data = proteinPrepare(mor, returnDetails=True,
    ...                                    hydrophobicThickness=32.0)
    >>> exposedRes = mor_data.data.membraneExposed
    >>> mor_data.data[exposedRes].to_excel("/tmp/mor_exposed_residues.xlsx")

    >>> im=Molecule("4bkj")
    >>> imo,imd=proteinPrepare(im,returnDetails=True)
    >>> imd.data.to_excel("/tmp/imatinib_report.xlsx")


    See Also
    --------
    The ResidueData object.


    Unsupported/To Do/To Check
    --------------------------
     - ligands
     - termini
     - force residues
     - multiple chains
     - nucleic acids
     - coupled titrating residues
     - Disulfide bridge detection (implemented but unused)

    """

    oldLoggingLevel = logger.level
    if verbose:
        logger.setLevel(logging.DEBUG)
    logger.debug("Starting.")

    _warnIfContainsDUM(mol_in)

    # We could transform the molecule into an internal object, but for
    # now I prefer to rely on the strange internal parser to avoid
    # hidden quirks.
    tmpin = tempfile.NamedTemporaryFile(suffix=".pdb", mode="w+")
    logger.debug("Temporary file is " + tmpin.name)
    mol_in.write(tmpin.name)  # Not sure this is good unix

    pdblist, errlist = readPDB(tmpin)
    if len(pdblist) == 0 and len(errlist) == 0:
        raise Exception('Internal error in preparing input to pdb2pqr')

    # An ugly hack to silence non-prefixed logging messages
    for h in propka.lib.logger.handlers:
        if h.formatter._fmt == '%(message)s':
            propka.lib.logger.removeHandler(h)

    propka_opts, dummy = propka.lib.loadOptions('--quiet')
    propka_opts.verbosity = verbose
    propka_opts.verbose = verbose  # Will be removed in future propKas

    # Note on naming. The behavior of PDB2PQR is controlled by two
    # parameters, ff and ffout. My understanding is that the ff
    # parameter sets which residues are SUPPORTED by the underlying
    # FF, PLUS the charge and radii.  The ffout parameter sets the
    # naming scheme. Therefore, I want ff to be as general as
    # possible, which turns out to be "parse". Then I pick a
    # convenient ffout.

    # Hold list (None -> None)
    hlist = _selToHoldList(mol_in, holdSelection)

    # Relying on defaults
    header, pqr, missedLigands, pdb2pqr_protein = runPDB2PQR(
        pdblist,
        ph=pH,
        verbose=verbose,
        ff="parse",
        ffout="amber",
        ph_calc_method="propka31",
        ph_calc_options=propka_opts,
        holdList=hlist)
    tmpin.close()

    # Diagnostics
    for missedligand in missedLigands:
        logger.warning("The following residue has not been optimized: " +
                       missedligand)

    # Here I parse the returned protein object and recreate a Molecule,
    # because I need to access the properties.
    logger.debug("Building Molecule object.")

    name = []
    resid = []
    chain = []
    insertion = []
    coords = []
    resname = []
    segid = []
    element = []
    occupancy = []
    beta = []
    record = []
    charge = []

    resData = ResidueData()

    resData.header = header
    resData.pqr = pqr

    for residue in pdb2pqr_protein.residues:
        # if 'ffname' in residue.__dict__:
        if getattr(residue, 'ffname', None):
            curr_resname = residue.ffname
            if len(curr_resname) >= 4:
                curr_resname = curr_resname[-3:]
                logger.debug(
                    "Residue %s has internal name %s, replacing with %s" %
                    (residue, residue.ffname, curr_resname))
        else:
            curr_resname = residue.name

        resData._setProtonationState(residue, curr_resname)

        # if 'patches' in residue.__dict__:
        if getattr(residue, 'patches', None):
            for patch in residue.patches:
                resData._appendPatches(residue, patch)
                if patch != "PEPTIDE":
                    logger.debug("Residue %s has patch %s set" %
                                 (residue, patch))

        if getattr(residue, 'wasFlipped', 'UNDEF') != 'UNDEF':
            resData._setFlipped(residue, residue.wasFlipped)

        for atom in residue.atoms:
            # Fixup element fields for added H (routines.addHydrogens)
            elt = "H" if atom.added and atom.name.startswith(
                "H") else atom.element
            name.append(atom.name)
            resid.append(residue.resSeq)
            chain.append(residue.chainID)
            insertion.append(residue.iCode)
            coords.append([atom.x, atom.y, atom.z])
            resname.append(curr_resname)
            segid.append(atom.segID)
            element.append(elt)
            occupancy.append(atom.occupancy)
            beta.append(atom.tempFactor)
            charge.append(atom.charge)
            record.append(atom.type)

    mol_out = _fillMolecule(name, resname, chain, resid, insertion, coords,
                            segid, element, occupancy, beta, charge, record)
    mol_out.box = mol_in.box
    _fixupWaterNames(mol_out)

    # Return residue information
    resData._importPKAs(pdb2pqr_protein.pka_protein)
    resData.pdb2pqr_protein = pdb2pqr_protein
    resData.missedLigands = missedLigands

    resData._listNonStandardResidues()
    resData._warnIfpKCloseTopH(pH)

    if hydrophobicThickness:
        resData._setMembraneExposureAndWarn(hydrophobicThickness)

    logger.debug("Returning.")
    logger.setLevel(oldLoggingLevel)

    if returnDetails:
        return mol_out, resData
    else:
        return mol_out