Ejemplo n.º 1
0
    def helixFromPdbs(self,
                      origin,
                      mrPdb,
                      nativePdb,
                      nativeChain,
                      dsspLog,
                      workdir=os.getcwd()):
        """This is a wrapper to generate the info and resSeqMap objects needed by score Origin"""

        mrPdbInfo = pdb_edit.get_info(mrPdb)
        nativePdbInfo = pdb_edit.get_info(nativePdb)

        assert nativeChain in nativePdbInfo.models[0].chains

        import residue_map
        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo(
            refInfo=nativePdbInfo,
            refChainID=nativeChain,
            targetInfo=mrPdbInfo,
            targetChainID=mrPdbInfo.models[0].chains[
                0]  # Only 1 chain in model
        )

        data = self.scoreOrigin(origin, mrPdbInfo, nativePdbInfo, resSeqMap,
                                workdir)
        contacts = data.contacts

        return self.helixFromContacts(contacts, dsspLog)
Ejemplo n.º 2
0
def analyseModels(amoptd):
    
    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]
    
    nativePdbInfo=amoptd['native_pdb_info']
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    amoptd['ref_model_pdb_info']=refModelPdbInfo
    try:
        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo( refInfo=refModelPdbInfo,
                            refChainID=refModelPdbInfo.models[0].chains[0], # Only 1 chain in model
                            targetInfo=nativePdbInfo,
                            targetChainID=nativePdbInfo.models[0].chains[0]
                          )
        amoptd['res_seq_map'] = resSeqMap
    except Exception as e:
        logger.exception("Error calculating resSeqMap: %s" % e)
        amoptd['res_seq_map']  = None # Won't be able to calculate RIO scores

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']])
        except Exception as e:
            logger.exception("Unable to run TMscores: %s", e)
    else:
        raise RuntimeError("No program to calculate TMSCORES")
Ejemplo n.º 3
0
    def test_resSeqMap4(self):
        # See if we can sort out the indexing between the native and model

        nativePdb = os.path.join(self.testfiles_dir, "1K33.pdb")
        modelPdb = os.path.join(self.testfiles_dir, "1K33_S_00000001.pdb")

        nativePdbStd = "1K33_std.pdb"
        pdb_edit.standardise(nativePdb, nativePdbStd)

        nativeInfo = pdb_edit.get_info(nativePdbStd)
        modelInfo = pdb_edit.get_info(modelPdb)

        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo(nativeInfo, 'A', modelInfo, 'A')

        os.unlink(nativePdbStd)
Ejemplo n.º 4
0
 def test_resSeqMap4(self):
     # See if we can sort out the indexing between the native and model
     
     
     nativePdb = os.path.join(self.testfiles_dir,"1K33.pdb")
     modelPdb = os.path.join(self.testfiles_dir,"1K33_S_00000001.pdb")
     
     nativePdbStd = "1K33_std.pdb"
     pdb_edit.standardise( nativePdb, nativePdbStd )
     
     nativeInfo = pdb_edit.get_info( nativePdbStd )
     modelInfo = pdb_edit.get_info( modelPdb )
     
     resSeqMap = residue_map.residueSequenceMap( )
     resSeqMap.fromInfo( nativeInfo, 'A', modelInfo, 'A' )
     
     os.unlink( nativePdbStd )
Ejemplo n.º 5
0
 def helixFromPdbs(self, origin, mrPdb, nativePdb, nativeChain, dsspLog, workdir=os.getcwd() ):
     """This is a wrapper to generate the info and resSeqMap objects needed by score Origin"""
     
     mrPdbInfo = pdb_edit.get_info(mrPdb)
     nativePdbInfo = pdb_edit.get_info(nativePdb)
     
     assert nativeChain in nativePdbInfo.models[0].chains
     
     import residue_map
     resSeqMap = residue_map.residueSequenceMap()
     resSeqMap.fromInfo( refInfo=nativePdbInfo,
                         refChainID=nativeChain,
                         targetInfo=mrPdbInfo,
                         targetChainID=mrPdbInfo.models[0].chains[0]# Only 1 chain in model
                         )
     
     data = self.scoreOrigin(origin, mrPdbInfo, nativePdbInfo, resSeqMap, workdir)
     contacts = data.contacts
     
     return self.helixFromContacts(contacts, dsspLog )
Ejemplo n.º 6
0
def analyseModels(amoptd):

    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]

    nativePdbInfo = amoptd['native_pdb_info']

    resSeqMap = residue_map.residueSequenceMap()
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    resSeqMap.fromInfo(
        refInfo=refModelPdbInfo,
        refChainID=refModelPdbInfo.models[0].
        chains[0],  # Only 1 chain in model
        targetInfo=nativePdbInfo,
        targetChainID=nativePdbInfo.models[0].chains[0])
    amoptd['res_seq_map'] = resSeqMap
    amoptd['ref_model_pdb_info'] = refModelPdbInfo

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'],
                                 wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(
                glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list,
                                                     structure_list,
                                                     fastas=[amoptd['fasta']])
        except Exception as e:
            msg = "Unable to run TMscores: {0}".format(e)
            logger.critical(msg)
    else:
        global _MAXCLUSTERER  # setting a module-level variable so need to use global keyword to it doesn't become a local variable
        _MAXCLUSTERER = maxcluster.Maxcluster(amoptd['maxcluster_exe'])
        logger.info("Analysing Rosetta models with Maxcluster")
        _MAXCLUSTERER.compareDirectory(nativePdbInfo=nativePdbInfo,
                                       resSeqMap=resSeqMap,
                                       modelsDirectory=amoptd['models_dir'],
                                       workdir=fixpath(
                                           amoptd['benchmark_dir']))
    return
Ejemplo n.º 7
0
def analyseModels(amoptd):

    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]

    nativePdbInfo = amoptd['native_pdb_info']
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    amoptd['ref_model_pdb_info'] = refModelPdbInfo
    try:
        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo(
            refInfo=refModelPdbInfo,
            refChainID=refModelPdbInfo.models[0].
            chains[0],  # Only 1 chain in model
            targetInfo=nativePdbInfo,
            targetChainID=nativePdbInfo.models[0].chains[0],
        )
        amoptd['res_seq_map'] = resSeqMap
    except Exception as e:
        logger.exception("Error calculating resSeqMap: %s" % e)
        amoptd['res_seq_map'] = None  # Won't be able to calculate RIO scores

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'],
                                 wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(
                glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list,
                                                     structure_list,
                                                     fastas=[amoptd['fasta']])
        except Exception as e:
            logger.exception("Unable to run TMscores: %s", e)
    else:
        raise RuntimeError("No program to calculate TMSCORES")
Ejemplo n.º 8
0
def analyseSolution(amoptd, d, mrinfo):

    logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name']))

    mrPdb = None
    if d['MR_program'] == "PHASER":
        mrPdb = d['PHASER_pdbout']
        mrMTZ = d['PHASER_mtzout']
    elif d['MR_program'] == "MOLREP":
        mrPdb = d['MOLREP_pdbout']
    elif d['MR_program'] == "unknown":
        return

    if mrPdb is None or not os.path.isfile(mrPdb):
        #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d))
        return

    # debug - copy into work directory as reforigin struggles with long pathnames
    shutil.copy(
        mrPdb,
        os.path.join(fixpath(amoptd['benchmark_dir']),
                     os.path.basename(mrPdb)))

    mrPdbInfo = pdb_edit.get_info(mrPdb)

    d['num_placed_chains'] = mrPdbInfo.numChains()
    d['num_placed_atoms'] = mrPdbInfo.numAtoms()
    d['num_placed_CA'] = mrPdbInfo.numCalpha()

    if amoptd['native_pdb']:
        if not d['SHELXE_os']:
            logger.critical(
                "mrPdb {0} has no SHELXE_os origin shift. Calculating...".
                format(mrPdb))
            mrinfo.analyse(mrPdb)
            mrOrigin = mrinfo.originShift
            d['SHELXE_MPE'] = mrinfo.MPE
            d['SHELXE_wMPE'] = mrinfo.wMPE
        else:
            mrOrigin = [c * -1 for c in d['SHELXE_os']]

        # Move pdb onto new origin
        originPdb = ample_util.filename_append(mrPdb,
                                               astr='offset',
                                               directory=fixpath(
                                                   amoptd['benchmark_dir']))
        #print(mrPdb, originPdb, mrOrigin)
        pdb_edit.translate(mrPdb, originPdb, mrOrigin)

        # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native
        csymmatch.Csymmatch().wrapModelToNative(
            originPdb,
            amoptd['native_pdb'],
            csymmatchPdb=os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "phaser_{0}_csymmatch.pdb".format(d['ensemble_name'])))
        # can now delete origin pdb
        os.unlink(originPdb)

        # Calculate phase error for the MR PDB
        try:
            mrinfo.analyse(mrPdb)
            d['MR_MPE'] = mrinfo.MPE
            d['MR_wMPE'] = mrinfo.wMPE
        except Exception as e:
            logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb, e))

        # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model
        # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of
        # the native structure.
        if not (amoptd['homologs'] or \
                amoptd['ideal_helices'] or \
                amoptd['import_ensembles'] or \
                amoptd['single_model_mode']):

            # Get reforigin info
            rmsder = reforigin.ReforiginRmsd()
            try:
                rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'],
                               placedPdbInfo=mrPdbInfo,
                               refModelPdbInfo=amoptd['ref_model_pdb_info'],
                               cAlphaOnly=True,
                               workdir=fixpath(amoptd['benchmark_dir']))
                d['reforigin_RMSD'] = rmsder.rmsd
            except Exception as e:
                logger.critical("Error calculating RMSD: {0}".format(e))
                d['reforigin_RMSD'] = 999

            # Score the origin with all-atom and rio
            rioData = rio.Rio().scoreOrigin(
                mrOrigin,
                mrPdbInfo=mrPdbInfo,
                nativePdbInfo=amoptd['native_pdb_info'],
                resSeqMap=amoptd['res_seq_map'],
                workdir=fixpath(amoptd['benchmark_dir']))

            # Set attributes
            d['AA_num_contacts'] = rioData.aaNumContacts
            d['RIO_num_contacts'] = rioData.rioNumContacts
            d['RIO_in_register'] = rioData.rioInRegister
            d['RIO_oo_register'] = rioData.rioOoRegister
            d['RIO_backwards'] = rioData.rioBackwards
            d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister
            d['RIO_no_cat'] = rioData.rioNumContacts - (rioData.rioInRegister +
                                                        rioData.rioOoRegister)
            d['RIO_norm'] = float(d['RIO']) / float(
                d['native_pdb_num_residues'])
        else:
            d['AA_num_contacts'] = None
            d['RIO_num_contacts'] = None
            d['RIO_in_register'] = None
            d['RIO_oo_register'] = None
            d['RIO_backwards'] = None
            d['RIO'] = None
            d['RIO_no_cat'] = None
            d['RIO_norm'] = None

    #     # Now get the helix
    #     helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts,
    #                                                            dsspLog=dsspLog )
    #     if helixSequence is not None:
    #         ampleResult.rioHelixSequence = helixSequence
    #         ampleResult.rioLenHelix      = len( helixSequence )
    #         hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) )
    #         with open( hfile, 'w' ) as f:
    #             f.write( helixSequence+"\n" )

    #
    # This purely for checking and so we have pdbs to view
    #
    # Wrap shelxe trace onto native using Csymmatch
        if not d['SHELXE_pdbout'] is None and os.path.isfile(
                fixpath(d['SHELXE_pdbout'])):
            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SHELXE_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                workdir=fixpath(amoptd['benchmark_dir']))

        if not ('SHELXE_wMPE' in d and d['SHELXE_wMPE']):
            try:
                mrinfo.analyse(d['SHELXE_pdbout'])
                d['SHELXE_MPE'] = mrinfo.MPE
                d['SHELXE_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SHELXE_pdbout: {0}\n{1}".format(
                        d['SHELXE_pdbout'], e))

        # Wrap parse_buccaneer model onto native
        if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(
                d['SXRBUCC_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name']))

            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SXRBUCC_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                csymmatchPdb=csymmatchPdb,
                workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRBUCC_pdbout'])
                d['SXRBUCC_MPE'] = mrinfo.MPE
                d['SXRBUCC_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SXRBUCC_pdbout: {0}\n{1}".format(
                        d['SXRBUCC_pdbout'], e))

        # Wrap parse_buccaneer model onto native
        if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name']))

            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SXRARP_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                csymmatchPdb=csymmatchPdb,
                workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRARP_pdbout'])
                d['SXRARP_MPE'] = mrinfo.MPE
                d['SXRARP_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SXRARP_pdbout: {0}\n{1}".format(
                        d['SXRARP_pdbout'], e))
    return
Ejemplo n.º 9
0
def analysePdb(amoptd):
    """Collect data on the native pdb structure"""

    nativePdb = fixpath(amoptd['native_pdb'])
    nativePdbInfo = pdb_edit.get_info(nativePdb)

    # number atoms/residues
    natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb)

    # Get information on the origins for this spaceGroup
    try:
        originInfo = pdb_model.OriginInfo(
            spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup)
    except Exception:
        originInfo = None

    # Do this here as a bug in pdbcur can knacker the CRYST1 data
    amoptd['native_pdb_code'] = nativePdbInfo.pdbCode
    amoptd['native_pdb_title'] = nativePdbInfo.title
    amoptd['native_pdb_resolution'] = nativePdbInfo.resolution
    amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent
    amoptd[
        'native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient
    if not originInfo:
        space_group = "P1"
    else:
        space_group = originInfo.spaceGroup()
    amoptd['native_pdb_space_group'] = space_group
    amoptd['native_pdb_num_atoms'] = natoms
    amoptd['native_pdb_num_residues'] = nresidues

    # First check if the native has > 1 model and extract the first if so
    if len(nativePdbInfo.models) > 1:
        logger.info("nativePdb has > 1 model - using first")
        nativePdb1 = ample_util.filename_append(filename=nativePdb,
                                                astr="model1",
                                                directory=fixpath(
                                                    amoptd['work_dir']))
        pdb_edit.extract_model(nativePdb,
                               nativePdb1,
                               modelID=nativePdbInfo.models[0].serial)
        nativePdb = nativePdb1

    # Standardise the PDB to rename any non-standard AA, remove solvent etc
    nativePdbStd = ample_util.filename_append(filename=nativePdb,
                                              astr="std",
                                              directory=fixpath(
                                                  amoptd['work_dir']))
    pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True)
    nativePdb = nativePdbStd

    # Get the new Info about the native
    nativePdbInfo = pdb_edit.get_info(nativePdb)

    # For maxcluster comparsion of shelxe model we need a single chain from the native so we get this here
    if len(nativePdbInfo.models[0].chains) > 1:
        chainID = nativePdbInfo.models[0].chains[0]
        nativeChain1 = ample_util.filename_append(filename=nativePdbInfo.pdb,
                                                  astr="chain1",
                                                  directory=fixpath(
                                                      amoptd['work_dir']))
        pdb_edit.to_single_chain(nativePdbInfo.pdb, nativeChain1)
    else:
        nativeChain1 = nativePdbInfo.pdb

    # Additional data
    amoptd['native_pdb_num_chains'] = len(nativePdbInfo.models[0].chains)
    amoptd['native_pdb_info'] = nativePdbInfo
    amoptd['native_pdb_std'] = nativePdbStd
    amoptd['native_pdb_1chain'] = nativeChain1
    amoptd['native_pdb_origin_info'] = originInfo

    return
Ejemplo n.º 10
0
def analyseSolution(amoptd, d, mrinfo):

    logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name']))

    mrPdb=None
    if d['MR_program']=="PHASER":
        mrPdb = d['PHASER_pdbout']
        mrMTZ = d['PHASER_mtzout']
    elif d['MR_program']=="MOLREP":
        mrPdb = d['MOLREP_pdbout']
    elif d['MR_program']=="unknown":
        return

    if mrPdb is None or not os.path.isfile(mrPdb):
        #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d))
        return

    # debug - copy into work directory as reforigin struggles with long pathnames
    shutil.copy(mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb)))
    
    mrPdbInfo = pdb_edit.get_info( mrPdb )
    
    d['num_placed_chains'] = mrPdbInfo.numChains()
    d['num_placed_atoms'] = mrPdbInfo.numAtoms()
    d['num_placed_CA'] = mrPdbInfo.numCalpha()
    
    if amoptd['native_pdb']:
        if not d['SHELXE_os']:
            logger.critical("mrPdb {0} has no SHELXE_os origin shift. Calculating...".format(mrPdb))
            mrinfo.analyse(mrPdb)
            mrOrigin = mrinfo.originShift
            d['SHELXE_MPE'] = mrinfo.MPE
            d['SHELXE_wMPE'] = mrinfo.wMPE
        else:
            mrOrigin=[c*-1 for c in d['SHELXE_os']]
        
        # Move pdb onto new origin
        originPdb = ample_util.filename_append(mrPdb, astr='offset',directory=fixpath(amoptd['benchmark_dir']))
        #print(mrPdb, originPdb, mrOrigin)
        pdb_edit.translate(mrPdb, originPdb, mrOrigin)
        
        # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native
        csymmatch.Csymmatch().wrapModelToNative(originPdb,
                                                amoptd['native_pdb'],
                                                csymmatchPdb=os.path.join(fixpath(amoptd['benchmark_dir']),
                                                "phaser_{0}_csymmatch.pdb".format(d['ensemble_name'])))
        # can now delete origin pdb
        os.unlink(originPdb)
        
        # Calculate phase error for the MR PDB
        try:
            mrinfo.analyse(mrPdb)
            d['MR_MPE'] = mrinfo.MPE
            d['MR_wMPE'] = mrinfo.wMPE
        except Exception as e:
            logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb,e))         
    
        # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model
        # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of 
        # the native structure - or if we were unable to calculate a res_seq_map
        if not (amoptd['homologs'] or \
                amoptd['ideal_helices'] or \
                amoptd['import_ensembles'] or \
                amoptd['single_model_mode'] or \
                amoptd['res_seq_map']):
    
            # Get reforigin info
            rmsder = reforigin.ReforiginRmsd()
            try:
                rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'],
                               placedPdbInfo=mrPdbInfo,
                               refModelPdbInfo=amoptd['ref_model_pdb_info'],
                               cAlphaOnly=True,
                               workdir=fixpath(amoptd['benchmark_dir']))
                d['reforigin_RMSD'] = rmsder.rmsd
            except Exception as e:
                logger.critical("Error calculating RMSD: {0}".format(e))
                d['reforigin_RMSD'] = 999
    
    
            # Score the origin with all-atom and rio
            rioData = rio.Rio().scoreOrigin(mrOrigin,
                                          mrPdbInfo=mrPdbInfo,
                                          nativePdbInfo=amoptd['native_pdb_info'],
                                          resSeqMap=amoptd['res_seq_map'],
                                          workdir=fixpath(amoptd['benchmark_dir'])
                                          )
        
            # Set attributes
            d['AA_num_contacts']  = rioData.aaNumContacts
            d['RIO_num_contacts'] = rioData.rioNumContacts
            d['RIO_in_register']  = rioData.rioInRegister
            d['RIO_oo_register']  = rioData.rioOoRegister
            d['RIO_backwards']    = rioData.rioBackwards
            d['RIO']              = rioData.rioInRegister + rioData.rioOoRegister
            d['RIO_no_cat']       = rioData.rioNumContacts - ( rioData.rioInRegister + rioData.rioOoRegister )
            d['RIO_norm']         = float(d['RIO']) / float(d['native_pdb_num_residues'])
        else:
            d['AA_num_contacts']  = None
            d['RIO_num_contacts'] = None
            d['RIO_in_register']  = None
            d['RIO_oo_register']  = None
            d['RIO_backwards']    = None
            d['RIO']              = None
            d['RIO_no_cat']       = None
            d['RIO_norm']         = None
    
    #     # Now get the helix
    #     helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts,
    #                                                            dsspLog=dsspLog )
    #     if helixSequence is not None:
    #         ampleResult.rioHelixSequence = helixSequence
    #         ampleResult.rioLenHelix      = len( helixSequence )
    #         hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) )
    #         with open( hfile, 'w' ) as f:
    #             f.write( helixSequence+"\n" )
    
        #
        # This purely for checking and so we have pdbs to view
        # 
        # Wrap shelxe trace onto native using Csymmatch
        if not d['SHELXE_pdbout'] is None and os.path.isfile(fixpath(d['SHELXE_pdbout'])):
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     workdir=fixpath(amoptd['benchmark_dir']))

        if not('SHELXE_wMPE' in d and d['SHELXE_wMPE']):
            try:
                mrinfo.analyse(d['SHELXE_pdbout'])
                d['SHELXE_MPE'] = mrinfo.MPE
                d['SHELXE_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SHELXE_pdbout: {0}\n{1}".format(d['SHELXE_pdbout'],e))    
                
        # Wrap parse_buccaneer model onto native
        if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(d['SXRBUCC_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name']))
    
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     csymmatchPdb=csymmatchPdb,
                                                     workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRBUCC_pdbout'])
                d['SXRBUCC_MPE'] = mrinfo.MPE
                d['SXRBUCC_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SXRBUCC_pdbout: {0}\n{1}".format(d['SXRBUCC_pdbout'],e))
                            
        # Wrap parse_buccaneer model onto native
        if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name']))
    
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     csymmatchPdb=csymmatchPdb,
                                                     workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRARP_pdbout'])
                d['SXRARP_MPE'] = mrinfo.MPE
                d['SXRARP_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SXRARP_pdbout: {0}\n{1}".format(d['SXRARP_pdbout'],e))
    return
Ejemplo n.º 11
0
def analysePdb(amoptd):
    """Collect data on the native pdb structure"""
    
    nativePdb = fixpath(amoptd['native_pdb'])
    nativePdbInfo = pdb_edit.get_info(nativePdb)
    
    # number atoms/residues
    natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb)

    # Get information on the origins for this spaceGroup
    try:
        originInfo = pdb_model.OriginInfo(spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup)
    except Exception:
        originInfo = None

    # Do this here as a bug in pdbcur can knacker the CRYST1 data
    amoptd['native_pdb_code'] = nativePdbInfo.pdbCode
    amoptd['native_pdb_title'] = nativePdbInfo.title
    amoptd['native_pdb_resolution'] = nativePdbInfo.resolution
    amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent
    amoptd['native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient
    if not originInfo:
        space_group = "P1"
    else:
        space_group = originInfo.spaceGroup()
    amoptd['native_pdb_space_group'] = space_group
    amoptd['native_pdb_num_atoms'] = natoms
    amoptd['native_pdb_num_residues'] = nresidues
    
    # First check if the native has > 1 model and extract the first if so
    if len( nativePdbInfo.models ) > 1:
        logger.info("nativePdb has > 1 model - using first")
        nativePdb1 = ample_util.filename_append( filename=nativePdb, astr="model1", directory=fixpath(amoptd['work_dir']))
        pdb_edit.extract_model( nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial )
        nativePdb = nativePdb1
        
    # Standardise the PDB to rename any non-standard AA, remove solvent etc
    nativePdbStd = ample_util.filename_append( filename=nativePdb, astr="std", directory=fixpath(amoptd['work_dir']))
    pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True)
    nativePdb = nativePdbStd
    
    # Get the new Info about the native
    nativePdbInfo = pdb_edit.get_info( nativePdb )
    
    # For comparsion of shelxe model we need a single chain from the native so we get this here
    if len( nativePdbInfo.models[0].chains ) > 1:
        nativeChain1  = ample_util.filename_append( filename=nativePdbInfo.pdb,
                                                       astr="chain1", 
                                                       directory=fixpath(amoptd['work_dir']))
        pdb_edit.merge_chains(nativePdbInfo.pdb, nativeChain1)
    else:
        nativeChain1 = nativePdbInfo.pdb
    
    # Additional data
    amoptd['native_pdb_num_chains'] = len( nativePdbInfo.models[0].chains )
    amoptd['native_pdb_info'] = nativePdbInfo
    amoptd['native_pdb_std'] = nativePdbStd
    amoptd['native_pdb_1chain'] = nativeChain1
    amoptd['native_pdb_origin_info'] = originInfo
    
    return