Beispiel #1
0
def generateMap( mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None ):
    """Generate a map from an mtz file and a pdb using reforigin"""
    
    assert os.path.isfile( mtz ) and os.path.isfile( pdb ), "Cannot find files: {0} {1}".format( mtz, pdb )
    
    if not directory:
        directory = os.getcwd()
    
    mapFile = ample_util.filename_append( filename=mtz, astr="map", directory=directory )
    mapFile = os.path.abspath(mapFile)
    mapPdb = ample_util.filename_append( filename=pdb, astr="map", directory=directory )

    cmd = [ "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT", mapPdb ]
    # FIX FOR DIFFERENT FP etc.     
    stdin ="""RIDG DIST SIGM 0.02
LABIN FP={0} SIGFP={1} FREE={2}
MAKE HYDR N
WEIGHT MATRIX 0.01
NCYC 0
END
""".format( FP, SIGFP, FREE )
    logfile=os.path.join(directory,"generateMap.log")
    ret = ample_util.run_command(cmd=cmd, logfile=logfile, dolog=True, stdin=stdin)
    
    assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile)

    return mapFile
Beispiel #2
0
 def wrapModelToNative(self,
                       mrPdb,
                       nativePdb,
                       origin=[0.0,0.0,0.0],
                       csymmatchPdb=None,
                       workdir=None,
                       cleanup=True):
     """Take a pdb and wrap it onto the nativePdb using csymmatch.
     If origin is not [0.0,0.0,0.0] we also move the structure onto the new origin before wrapping"""
     
     if workdir is None: 
         workdir = os.getcwd()
     
     assert os.path.isfile(mrPdb) and os.path.isfile(nativePdb),"Cannot find: {0} or {1}".format(mrPdb,nativePdb)
     
     originMrPdb = None
     if origin != [ 0.0, 0.0, 0.0 ]:
         ostr="o{}_{}".format(origin, str(uuid.uuid1())).replace(" ","" )
         originMrPdb = ample_util.filename_append(filename=mrPdb, astr=ostr, directory=workdir)
         pdb_edit.translate(inpdb=mrPdb, outpdb=originMrPdb, ftranslate=origin)
         mrPdb = originMrPdb
     
     if csymmatchPdb is None:
         csymmatchPdb = ample_util.filename_append(filename=mrPdb, astr="csymmatch_{}".format(str(uuid.uuid1())), 
                                                   directory=workdir)
     
     self.run(refPdb=nativePdb, inPdb=mrPdb, outPdb=csymmatchPdb, originHand=False, cleanup=cleanup)
     
     if not os.path.isfile( csymmatchPdb ): 
         raise RuntimeError("Error generating csymmatchPdb")
     
     if cleanup and originMrPdb: 
         os.unlink(originMrPdb)
            
     return csymmatchPdb
Beispiel #3
0
    def scoreOrigin(self,
                    origin=None,
                    mrPdbInfo=None,
                    nativePdbInfo=None,
                    resSeqMap=None,
                    workdir=os.getcwd()
                     ):
        
        self.workdir = workdir
        if not resSeqMap.resSeqMatch():
            # We need to create a copy of the placed pdb with numbering matching the native
            mrPdbRes = ample_util.filename_append( filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir )
            pdb_edit.match_resseq( targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap )
            mrPdb = mrPdbRes
        else:
            mrPdb = mrPdbInfo.pdb
 
        # Make a copy of mrPdb with chains renamed to lower case
        ucChains = mrPdbInfo.models[0].chains
        toChains = [ c.lower() for c in ucChains ]
        mrAaPdb = ample_util.filename_append( filename=mrPdb, astr="ren", directory=self.workdir )
        pdb_edit.rename_chains( inpdb=mrPdb, outpdb=mrAaPdb, fromChain=ucChains, toChain=toChains )

        # The list of chains in the native that we will be checking contacts from
        fromChains = nativePdbInfo.models[0].chains
        
        mrOriginPdb =  mrAaPdb
        if origin != [ 0.0, 0.0, 0.0 ]:
            # Move pdb to new origin
            #ostr="origin{0}".format(i)
            ostr="o{0}".format( origin ).replace(" ","" )
            mrOriginPdb = ample_util.filename_append( filename=mrAaPdb, astr=ostr, directory=self.workdir )
            pdb_edit.translate( inpdb=mrAaPdb, outpdb=mrOriginPdb, ftranslate=origin )
        
        # Concatenate into one file
        joinedPdb = ample_util.filename_append( filename=mrOriginPdb, astr="joined", directory=self.workdir )
        pdb_edit.merge( pdb1=nativePdbInfo.pdb, pdb2=mrOriginPdb, pdbout=joinedPdb )
            
        # Run ncont
        data = RioData()
        data.origin = origin
        data.originPdb = mrOriginPdb
        data.joinedPdb = joinedPdb
        data.fromChains = fromChains
        data.toChains = toChains

        # First get AllAtom score        
        self.calcAllAtom( data )
        
        # Then score RIO
        self.calcRio( data )
        #data.numGood = data.inregister + data.ooregister
        
        # clean up
        os.unlink(mrOriginPdb)
        os.unlink(joinedPdb)
        if os.path.isfile(mrAaPdb): os.unlink(mrAaPdb)
        
        return data
Beispiel #4
0
 def process_models(self, models, out_dir, strip_oxt=False, prefix="scwrl"):
     logger.info('Adding sidechains with SCWRL to models')
     out_pdbs = []
     for i, pdb in enumerate(models):
         out_pdbs.append(self.add_sidechains(pdbin=pdb,
                                             pdbout=ample_util.filename_append(pdb, prefix, directory=out_dir),
                                             strip_oxt=strip_oxt))
     logger.info('Processed {0} models with SCWRL into directory: {1}'.format(i+1, out_dir))
     return out_pdbs
Beispiel #5
0
def model_core_from_fasta(models, alignment_file, work_dir=None, case_sensitive=False):
    if not os.path.isdir(work_dir): os.mkdir(work_dir)
    
    # Read in alignment to get
    align_seq = sequence_util.Sequence(fasta=alignment_file)
    
    # Check all alignments the same length
    
    # Get pdb names from alignment headers
    seq_names = [ h[1:].strip() for h in align_seq.headers ]
    
    # Need to check if the alignment file is from gesamt, in which case, the names have the
    # chain names in brackets appended
    for i, s in enumerate(seq_names):
        x = re.search("\([a-zA-Z]*\)$", s)
        if x: seq_names[i] = s.replace(x.group(0), "")
    
    # Get array specifying which positions are core. If the positions all align, then there
    # will be a capital letter for the residue. Gaps are signified by "-" and non-structurally-
    # aligned residues by lower-case letters
    GAP = '-'
    # Can't use below as Theseus ignores lower-case letters in the alignment
    if case_sensitive:
        core = [ all([ x in pdb_edit.one2three.keys() for x in t ]) for t in zip(*align_seq.sequences) ]
    else:
        core = [ all([ x != GAP for x in t ]) for t in zip(*align_seq.sequences) ]

    if not any(core): raise RuntimeError("Cannot generate core for models: {0}".format(models))
    
    # For each sequence, get a list of which positions are core
    core_positions = []
    for seq in align_seq.sequences:
        p = []
        count = 0
        for i, pos in enumerate(seq):
            if pos != GAP:
                if core[i]: p.append(count)
                count += 1
        core_positions.append(p)
        
    # Should check lengths of sequences match the length of the aa in the pdbs
        
    # Create dict mapping seq_names to core positions
    core_dict = dict((s, core_positions[i]) for i, s in enumerate(seq_names))
    
    # Cut the models down to core
    core_models = []
    for m in models:
        name = os.path.basename(m)
        pdbout = ample_util.filename_append(m, astr='core', directory=work_dir)
        pdb_edit.select_residues(m, pdbout, tokeep_idx=core_dict[name])
        core_models.append(pdbout)
        
    return core_models
Beispiel #6
0
def model_core_from_theseus(models, alignment_file, var_by_res, work_dir=None):
    """
    Only residues from the first protein are listed in the theseus output, but then not even all of them
    
    We assume the output is based on the original alignment so that where each residue in the first protein 
    lines up with either another residue in one of the other proteins or a gap
    
    SO - we need to go through the theseus data and for each residue that is core find the corresponding residues 
    in the other proteins
    
    We use the resSeq numbers to match the residues across the alignment
    """
    if not os.path.isdir(work_dir): os.mkdir(work_dir)

    seqalign = sequence_util.Sequence(fasta=alignment_file)

    # We now need to add the list of pdbs, chains and resSeqs of the other models to the Sequence object
    for m in models: seqalign.add_pdb_data(m)
    
    # Sanity check that the names of the pdb files match those from the fasta header
    # Format is expected to be: '>1ujb.pdb(A)'
    names = [ h[1:].split('(')[0] for h in seqalign.headers ]
    if not seqalign.pdbs == names:
        raise RuntimeError, "headers and names of pdb files do not match!\n{0}\n{1}".format(seqalign.pdbs, names)
    
    # Get the name of the first pdb that the alignment is based on
    first = seqalign.pdbs[0]
    
    # Dictionary mapping model pdb to resSeqs that are core
    model2core = {}
    for p in seqalign.pdbs: model2core[p] = [] # initialise
    
    # Get list of core resSeqs in the first sequence
    model2core[first] = [ x.resSeq for x in var_by_res if x.core ]
    
    # Now go through the first sequence and get the resSeqs of the corresponding core for the other models
    pointer = 0 # Tracks where we are in the first sequence
    for i, resSeq in enumerate(seqalign.resseqs[0]):
        if model2core[first][pointer] == resSeq:
            # Core residue in first sequence so append the corresponding resSeqs for the other proteins
            for j, pdb in enumerate(seqalign.pdbs[1:]):
                model2core[pdb].append(seqalign.resseqs[j+1][i])
            pointer += 1
            if pointer >= len(model2core[first]): break
            
    core_models = []
    for m in models:
        name = os.path.basename(m)
        pdbout = ample_util.filename_append(m, astr='core', directory=work_dir)
        pdb_edit.select_residues(m, pdbout, tokeep=model2core[name])
        core_models.append(pdbout)
        
    return core_models
Beispiel #7
0
 def truncate_models(self,
                     models,
                     max_cluster_size=200,
                     truncation_method=None,
                     percent_truncation=None,
                     percent_fixed_intervals=None,
                     truncation_pruning=None,
                     residue_scores=None,
                     homologs=False,
                     alignment_file=None,
                     work_dir=None):
     """Generate a set of Truncation objects, referencing a set of truncated models generated from the supplied models"""
     truncations = self.calculate_truncations(
         models=models,
         truncation_method=truncation_method,
         percent_truncation=percent_truncation,
         percent_fixed_intervals=percent_fixed_intervals,
         truncation_pruning=truncation_pruning,
         residue_scores=residue_scores,
         alignment_file=alignment_file,
         homologs=homologs)
     if truncations is None or len(truncations) < 1:
         logger.critical("Unable to truncate the ensembles - no viable truncations")
         return []
     # Loop through the Truncation objects, truncating the models based on the truncation data and adding
     # the truncated models to the Truncation.models attribute
     for truncation in truncations:
         truncation.directory = os.path.join(self.work_dir, 'tlevel_{0}'.format(truncation.level))
         os.mkdir(truncation.directory)
         logger.info('Truncating at: %s in directory %s', truncation.level, truncation.directory)
         truncation.models = []
         for infile in self.models:
             pdbout = ample_util.filename_append(infile, str(truncation.level), directory=truncation.directory)
             # Loop through PDB files and create new ones that only contain the residues left after truncation
             pdb_edit.select_residues(pdbin=infile, pdbout=pdbout, tokeep_idx=truncation.residues_idxs)
             truncation.models.append(pdbout)
     self.truncations = truncations
     return truncations
Beispiel #8
0
    def generate_ensembles(self,
                           models,
                           alignment_file=None,
                           homolog_aligner=None,
                           percent_fixed_intervals=None,
                           percent_truncation=None,
                           side_chain_treatments=SIDE_CHAIN_TREATMENTS,
                           truncation_method=None,
                           **kwargs):
        
        if not percent_truncation:
            percent_truncation = self.percent_truncation
        if not truncation_method:
            truncation_method = self.truncation_method
        
        if not len(models):
            msg = "Cannot find any models for ensembling!"
            raise RuntimeError(msg) 
        if not all([os.path.isfile(m) for m in models]):
            msg = "Problem reading models given to Ensembler: {0}".format(models)
            raise RuntimeError(msg)
        
        logger.info('Ensembling models in directory: %s', self.work_dir)
    
        # Create final ensembles directory
        if not os.path.isdir(self.ensembles_directory):
            os.mkdir(self.ensembles_directory)
        
        # standardise all the models
        std_models_dir = os.path.join(self.work_dir, "std_models")
        os.mkdir(std_models_dir)
        std_models = []
        for m in models:
            std_model = ample_util.filename_append(m, 'std', std_models_dir)
            pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True)
            std_models.append(std_model)
        
        # Get a structural alignment between the different models
        if not alignment_file:
            if homolog_aligner == 'mustang':
                logger.info("Generating alignment file with mustang_exe: %s", self.mustang_exe)
                alignment_file = align_mustang(std_models, mustang_exe=self.mustang_exe, work_dir=self.work_dir)
            elif homolog_aligner == 'gesamt':
                logger.info("Generating alignment file with gesamt_exe: %s", self.gesamt_exe)
                alignment_file = align_gesamt(std_models, gesamt_exe=self.gesamt_exe, work_dir=self.work_dir)
            else:
                msg = "Unknown homolog_aligner: {0}".format(homolog_aligner)
                raise RuntimeError(msg)
            logger.info("Generated alignment file: %s", alignment_file)
        else:
            logger.info("Using alignment file: %s", alignment_file)
        
        truncate_dir = os.path.join(self.work_dir,"homolog_truncate")
        if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir)
            
        # Now truncate and create ensembles - as standard ample, but with no subclustering
        self.ensembles = []
        self.truncator = truncation_util.Truncator(work_dir=truncate_dir)
        self.truncator.theseus_exe = self.theseus_exe
        for truncation in self.truncator.truncate_models(models=std_models,
                                                         truncation_method=truncation_method,
                                                         percent_fixed_intervals=percent_fixed_intervals,
                                                         percent_truncation=percent_truncation,
                                                         truncation_pruning=None,
                                                         homologs=True,
                                                         alignment_file=alignment_file):
            ensemble_dir = os.path.join(truncation.directory, "ensemble_{0}".format(truncation.level))
            os.mkdir(ensemble_dir)
            os.chdir(ensemble_dir)
             
            # Need to create an alignment file for theseus
            basename = "e{0}".format(truncation.level)
            superposed_models = self.superpose_models(truncation.models, basename=basename, work_dir=ensemble_dir, homologs=True)
            if not superposed_models:
                logger.critical("Skipping ensemble %s due to error with Theseus", basename)
                continue
            
            # Create Ensemble object
            pre_ensemble = _ensembler.Ensemble()
            pre_ensemble.num_residues = truncation.num_residues
            pre_ensemble.truncation_dir = truncation.directory
            pre_ensemble.truncation_level = truncation.level
            pre_ensemble.truncation_method = truncation.method
            pre_ensemble.truncation_percent = truncation.percent
            pre_ensemble.truncation_residues = truncation.residues
            pre_ensemble.truncation_variance = truncation.variances
            pre_ensemble.pdb = superposed_models

            for ensemble in self.edit_side_chains(pre_ensemble,
                                                  side_chain_treatments,
                                                  homologs=True):
                self.ensembles.append(ensemble)
                
        return self.ensembles
Beispiel #9
0
def analysePdb(amoptd):
    """Collect data on the native pdb structure"""

    nativePdb = fixpath(amoptd['native_pdb'])
    nativePdbInfo = pdb_edit.get_info(nativePdb)

    # number atoms/residues
    natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb)

    # Get information on the origins for this spaceGroup
    try:
        originInfo = pdb_model.OriginInfo(
            spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup)
    except:
        originInfo = None

    # Do this here as a bug in pdbcur can knacker the CRYST1 data
    amoptd['native_pdb_code'] = nativePdbInfo.pdbCode
    amoptd['native_pdb_title'] = nativePdbInfo.title
    amoptd['native_pdb_resolution'] = nativePdbInfo.resolution
    amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent
    amoptd[
        'native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient
    if not originInfo:
        space_group = "P1"
    else:
        space_group = originInfo.spaceGroup()
    amoptd['native_pdb_space_group'] = space_group
    amoptd['native_pdb_num_atoms'] = natoms
    amoptd['native_pdb_num_residues'] = nresidues

    # First check if the native has > 1 model and extract the first if so
    if len(nativePdbInfo.models) > 1:
        logger.info("nativePdb has > 1 model - using first")
        nativePdb1 = ample_util.filename_append(filename=nativePdb,
                                                astr="model1",
                                                directory=fixpath(
                                                    amoptd['work_dir']))
        pdb_edit.extract_model(nativePdb,
                               nativePdb1,
                               modelID=nativePdbInfo.models[0].serial)
        nativePdb = nativePdb1

    # Standardise the PDB to rename any non-standard AA, remove solvent etc
    nativePdbStd = ample_util.filename_append(filename=nativePdb,
                                              astr="std",
                                              directory=fixpath(
                                                  amoptd['work_dir']))
    pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True)
    nativePdb = nativePdbStd

    # Get the new Info about the native
    nativePdbInfo = pdb_edit.get_info(nativePdb)

    # For maxcluster comparsion of shelxe model we need a single chain from the native so we get this here
    if len(nativePdbInfo.models[0].chains) > 1:
        chainID = nativePdbInfo.models[0].chains[0]
        nativeChain1 = ample_util.filename_append(filename=nativePdbInfo.pdb,
                                                  astr="chain1",
                                                  directory=fixpath(
                                                      amoptd['work_dir']))
        pdb_edit.to_single_chain(nativePdbInfo.pdb, nativeChain1)
    else:
        nativeChain1 = nativePdbInfo.pdb

    # Additional data
    amoptd['native_pdb_num_chains'] = len(nativePdbInfo.models[0].chains)
    amoptd['native_pdb_info'] = nativePdbInfo
    amoptd['native_pdb_std'] = nativePdbStd
    amoptd['native_pdb_1chain'] = nativeChain1
    amoptd['native_pdb_origin_info'] = originInfo

    return
Beispiel #10
0
def model_core_from_fasta(models,
                          alignment_file,
                          work_dir=None,
                          case_sensitive=False):
    if not os.path.isdir(work_dir):
        os.mkdir(work_dir)

    # Read in alignment to get
    align_seq = sequence_util.Sequence(fasta=alignment_file)

    # Check all alignments the same length

    # Get pdb names from alignment headers
    seq_names = [h[1:].strip() for h in align_seq.headers]

    # Need to check if the alignment file is from gesamt, in which case, the names have the
    # chain names in brackets appended
    for i, s in enumerate(seq_names):
        x = re.search("\([a-zA-Z]*\)$", s)
        if x:
            seq_names[i] = s.replace(x.group(0), "")

    # Get array specifying which positions are core. If the positions all align, then there
    # will be a capital letter for the residue. Gaps are signified by "-" and non-structurally-
    # aligned residues by lower-case letters
    GAP = '-'
    # Can't use below as Theseus ignores lower-case letters in the alignment
    if case_sensitive:
        core = [
            all([x in ample_util.one2three.keys() for x in t])
            for t in zip(*align_seq.sequences)
        ]
    else:
        core = [all([x != GAP for x in t]) for t in zip(*align_seq.sequences)]

    if not any(core):
        raise RuntimeError(
            "Cannot generate core for models: {0}".format(models))

    # For each sequence, get a list of which positions are core
    core_positions = []
    for seq in align_seq.sequences:
        p = []
        count = 0
        for i, pos in enumerate(seq):
            if pos != GAP:
                if core[i]:
                    p.append(count)
                count += 1
        core_positions.append(p)

    # Should check lengths of sequences match the length of the aa in the pdbs

    # Create dict mapping seq_names to core positions
    core_dict = dict((s, core_positions[i]) for i, s in enumerate(seq_names))

    # Cut the models down to core
    core_models = []
    for m in models:
        name = os.path.basename(m)
        pdbout = ample_util.filename_append(m, astr='core', directory=work_dir)
        pdb_edit.select_residues(m, pdbout, tokeep_idx=core_dict[name])
        core_models.append(pdbout)

    return core_models
Beispiel #11
0
    def findOrigin(self,
                   nativePdbInfo=None,
                   mrPdbInfo=None,
                   resSeqMap=None,
                   origins=None,
                   allAtom=False,
                   workdir=os.getcwd() ):
        """Find the origin using the maximum number of contacts as metric"""
        
        self.workdir = workdir
        if not resSeqMap.resSeqMatch():
            # We need to create a copy of the placed pdb with numbering matching the native
            mrPdbRes = ample_util.filename_append( filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir )
            pdb_edit.match_resseq( targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap )
            mrPdb = mrPdbRes
        else:
            mrPdb = mrPdbInfo.pdb
 
        # Make a copy of mrPdb with chains renamed to lower case
        ucChains = mrPdbInfo.models[0].chains
        toChains = [ c.lower() for c in ucChains ]
        placedAaPdb = ample_util.filename_append( filename=mrPdb, astr="ren", directory=self.workdir )
        pdb_edit.rename_chains( inpdb=mrPdb, outpdb=placedAaPdb, fromChain=ucChains, toChain=toChains )

        # The list of chains in the native that we will be checking contacts from
        fromChains = nativePdbInfo.models[0].chains
        
        # Loop over origins, move the placed pdb to the new origin and then run ncont
        # Object to hold data on best origin
        self.data = None
        for origin in origins:
            placedOriginPdb =  placedAaPdb
            if origin != [ 0.0, 0.0, 0.0 ]:
                # Move pdb to new origin
                #ostr="origin{0}".format(i)
                ostr="o{0}".format( origin ).replace(" ","" )
                placedOriginPdb = ample_util.filename_append( filename=placedAaPdb, astr=ostr, directory=self.workdir )
                pdb_edit.translate( inpdb=placedAaPdb, outpdb=placedOriginPdb, ftranslate=origin )
            
            # Concatenate into one file
            joinedPdb = ample_util.filename_append( filename=placedOriginPdb, astr="joined", directory=self.workdir )
            pdb_edit.merge( pdb1=nativePdbInfo.pdb, pdb2=placedOriginPdb, pdbout=joinedPdb )
            
            # Set up object to hold data
            data            = RioData()
            data.origin     = origin
            data.originPdb  = placedOriginPdb
            data.joinedPdb  = joinedPdb
            data.fromChains = fromChains
            data.toChains   = toChains
            data.numGood    = 0 # For holding the metric
            
            # Run ncont
            if allAtom:
                self.calcAllAtom( data )
                data.numGood = data.aaNumContacts
            else:
                self.calcRio( data )
                data.numGood = data.rioInRegister + data.rioOoRegister
            
            # Save the first origin and only update if we get a better score
            if not self.data or data.numGood > self.data.numGood:
                self.data = data

        # End loop over origins
        
        # Now need to calculate data for whichever one we didn't calculate
        if allAtom:
            self.calcRio( self.data )
        else:
            self.calcAllAtom( self.data )
        
        if self.data.numGood > 0:
            
            # If we got a match run csymmatch so we can see the result
            csym = csymmatch.Csymmatch()
            csymmatchPdb = ample_util.filename_append( filename=self.data.originPdb,
                                                       astr="csymmatch_best",
                                                       directory=self.workdir )
            csym.run( refPdb=nativePdbInfo.pdb,
                      inPdb=self.data.originPdb,
                      outPdb=csymmatchPdb,
                      originHand=False )
        
        return self.data
Beispiel #12
0
    def generate_ensembles(self,
                           models,
                           ensembles_directory=None,
                           nproc=None,
                           percent_truncation=None,
                           percent_fixed_intervals=None,
                           side_chain_treatments=SIDE_CHAIN_TREATMENTS,
                           truncation_method=None,
                           truncation_pruning=None,
                           truncation_scorefile=None,
                           truncation_scorefile_header=None):
        """Method to generate ensembles from a single structure based on 
        residue scores"""

        if not truncation_method:
            truncation_method = self.truncation_method
        if not truncation_pruning:
            truncation_pruning = self.truncation_pruning
        if not truncation_scorefile:
            truncation_scorefile = self.truncation_scorefile

        if len(models) > 1:
            msg = "More than 1 structure provided"
            logger.critical(msg)
            raise RuntimeError(msg)

        if len(truncation_scorefile_header) < 2:
            msg = "At least two header options for scorefile are required"
            logger.critical(msg)
            raise RuntimeError(msg)

        # standardise the structure
        std_models_dir = os.path.join(self.work_dir, "std_models")
        os.mkdir(std_models_dir)

        std_model = ample_util.filename_append(models[0], 'std',
                                               std_models_dir)
        pdb_edit.standardise(pdbin=models[0], pdbout=std_model,
                             del_hetatm=True)
        std_models = [std_model]
        logger.info('Standardised input model: %s', std_models[0])

        # Create final ensembles directory
        if not os.path.isdir(self.ensembles_directory):
            os.mkdir(self.ensembles_directory)

        truncate_dir = os.path.join(self.work_dir, "single_truncate")
        if not os.path.isdir(truncate_dir):
            os.mkdir(truncate_dir)

        # Read all the scores into a per residue dictionary
        assert len(truncation_scorefile_header) > 1, \
            "At least two column labels are required"
        residue_scores = self._read_scorefile(truncation_scorefile)
        residue_key = truncation_scorefile_header.pop(0)
        truncation_scorefile_header = map(str.strip,
                                          truncation_scorefile_header)
        assert all(h in residue_scores[0] for h in truncation_scorefile_header), \
            "Not all column labels are in your CSV file"
        self.ensembles = []
        for score_key in truncation_scorefile_header:
            zipped_scores = self._generate_residue_scorelist(residue_key,
                                                             score_key,
                                                             residue_scores)
            score_truncate_dir = os.path.join(truncate_dir,
                                              "{}".format(score_key))
            if not os.path.isdir(score_truncate_dir):
                os.mkdir(score_truncate_dir)

            self.truncator = truncation_util.Truncator(
                work_dir=score_truncate_dir)
            self.truncator.theseus_exe = self.theseus_exe
            for truncation in self.truncator.truncate_models(models=std_models,
                                                             truncation_method=truncation_method,
                                                             percent_truncation=percent_truncation,
                                                             percent_fixed_intervals=percent_fixed_intervals,
                                                             truncation_pruning=truncation_pruning,
                                                             residue_scores=zipped_scores):

                pre_ensemble = _ensembler.Ensemble()
                pre_ensemble.num_residues = truncation.num_residues
                pre_ensemble.truncation_dir = truncation.directory
                pre_ensemble.truncation_level = truncation.level
                pre_ensemble.truncation_method = truncation.method
                pre_ensemble.truncation_percent = truncation.percent
                pre_ensemble.truncation_residues = truncation.residues
                pre_ensemble.truncation_variance = truncation.variances
                pre_ensemble.truncation_score_key = score_key.lower()
                pre_ensemble.pdb = truncation.models[0]

                for ensemble in self.edit_side_chains(pre_ensemble,
                                                      side_chain_treatments,
                                                      single_structure=True):
                    self.ensembles.append(ensemble)

        return self.ensembles
Beispiel #13
0
    def getRmsd( self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True  ):
        """For now just save lowest rmsd - can look at collecting more nativeInfo later
        
        Currently we assume we are only given one model and that it has already been standardised.
        """

        if workdir:
            self.workdir = workdir
        if not self.workdir:
            self.workdir = os.getcwd()
        
        self.cAlphaOnly = cAlphaOnly# Whether to only compare c-alpha atoms
        
        # Run a pass to find the # chains
        native_chains = nativePdbInfo.models[ 0 ].chains
        placed_chains = placedPdbInfo.models[ 0 ].chains
        
        #print "got native chains ", native_chains
        #print "got placed chains ", placed_chains
            
        rmsds = {} # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile )
        
        # Match each chain in native against refined and pick the best
        for nativeChainID in native_chains:
            
            #print "native_chain: {0}".format( nativeChainID )
                    
            if len( native_chains ) == 1:
                # Don't need to do owt as we are just using the native as is
                nativeChainPdb = nativePdbInfo.pdb
            else:
                # Extract the chain from the pdb
                astr = "chain{0}".format( nativeChainID )
                nativeChainPdb = ample_util.filename_append( filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir )
                pdb_edit.extract_chain( nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID )
                
            # Calculate the RefSeqMap - need to do this before we reduce to c-alphas
            # The second chain may be a different composition to the first, so we only generate a traceback if we fail
            # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains
            try:
                resSeqMap = residue_map.residueSequenceMap()
                resSeqMap.fromInfo( refInfo=nativePdbInfo,
                                    refChainID=nativeChainID,
                                    targetInfo=refModelPdbInfo,
                                    targetChainID='A' # Model only has one chain
                                    )
                
            except RuntimeError:
                if nativeChainID == native_chains[0]:
                    raise
                else:
                    # Only compare the first chain
                    break
                
            for placedChainID in placed_chains:
                
                # Prepare the placed PDB
                placedChainPdb = self.preparePlacedPdb( placedPdb=placedPdbInfo.pdb, placedChainID=placedChainID, nativeChainID=nativeChainID, resSeqMap=resSeqMap )
                
                # Now create a PDB with the matching atoms from native that are in refined
                nativePdbMatch = ample_util.filename_append( filename=nativeChainPdb, astr="matched", directory=self.workdir )
                pdb_edit.keep_matching( refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap )
                
                # Now get the rmsd
                astr = "chain{0}_reforigin".format( nativeChainID )
                reforiginOut = ample_util.filename_append( filename=placedChainPdb, astr=astr, directory=self.workdir )
                
                try:
                    rms = self.calculate( refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut )
                except RuntimeError, e:
                    logger.critical("GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format( placedChainPdb, nativeChainPdb, nativeChainID, e))
                    rms = 99999
                rmsds[ rms ] = ( nativeChainID, placedChainID, reforiginOut )
                # Clean up
                os.unlink(placedChainPdb)
                os.unlink(nativePdbMatch)
Beispiel #14
0
    def scoreOrigin(self,
                    origin=None,
                    mrPdbInfo=None,
                    nativePdbInfo=None,
                    resSeqMap=None,
                    workdir=os.getcwd()):

        self.workdir = workdir
        if not resSeqMap.resSeqMatch():
            # We need to create a copy of the placed pdb with numbering matching the native
            mrPdbRes = ample_util.filename_append(filename=mrPdbInfo.pdb,
                                                  astr="reseq",
                                                  directory=self.workdir)
            pdb_edit.match_resseq(targetPdb=mrPdbInfo.pdb,
                                  sourcePdb=None,
                                  outPdb=mrPdbRes,
                                  resMap=resSeqMap)
            mrPdb = mrPdbRes
        else:
            mrPdb = mrPdbInfo.pdb

        # Make a copy of mrPdb with chains renamed to lower case
        ucChains = mrPdbInfo.models[0].chains
        toChains = [c.lower() for c in ucChains]
        mrAaPdb = ample_util.filename_append(filename=mrPdb,
                                             astr="ren",
                                             directory=self.workdir)
        pdb_edit.rename_chains(inpdb=mrPdb,
                               outpdb=mrAaPdb,
                               fromChain=ucChains,
                               toChain=toChains)

        # The list of chains in the native that we will be checking contacts from
        fromChains = nativePdbInfo.models[0].chains

        mrOriginPdb = mrAaPdb
        if origin != [0.0, 0.0, 0.0]:
            # Move pdb to new origin
            # ostr="origin{0}".format(i)
            ostr = "o{0}".format(origin).replace(" ", "")
            mrOriginPdb = ample_util.filename_append(filename=mrAaPdb,
                                                     astr=ostr,
                                                     directory=self.workdir)
            pdb_edit.translate(inpdb=mrAaPdb,
                               outpdb=mrOriginPdb,
                               ftranslate=origin)

        # Concatenate into one file
        joinedPdb = ample_util.filename_append(filename=mrOriginPdb,
                                               astr="joined",
                                               directory=self.workdir)
        pdb_edit.merge(pdb1=nativePdbInfo.pdb,
                       pdb2=mrOriginPdb,
                       pdbout=joinedPdb)

        # Run ncont
        data = RioData()
        data.origin = origin
        data.originPdb = mrOriginPdb
        data.joinedPdb = joinedPdb
        data.fromChains = fromChains
        data.toChains = toChains

        # First get AllAtom score
        self.calcAllAtom(data)

        # Then score RIO
        self.calcRio(data)
        # data.numGood = data.inregister + data.ooregister

        # clean up
        os.unlink(mrOriginPdb)
        os.unlink(joinedPdb)
        if os.path.isfile(mrAaPdb):
            os.unlink(mrAaPdb)

        return data
Beispiel #15
0
    def findOrigin(self,
                   nativePdbInfo=None,
                   mrPdbInfo=None,
                   resSeqMap=None,
                   origins=None,
                   allAtom=False,
                   workdir=os.getcwd()):
        """Find the origin using the maximum number of contacts as metric"""

        self.workdir = workdir
        if not resSeqMap.resSeqMatch():
            # We need to create a copy of the placed pdb with numbering matching the native
            mrPdbRes = ample_util.filename_append(filename=mrPdbInfo.pdb,
                                                  astr="reseq",
                                                  directory=self.workdir)
            pdb_edit.match_resseq(targetPdb=mrPdbInfo.pdb,
                                  sourcePdb=None,
                                  outPdb=mrPdbRes,
                                  resMap=resSeqMap)
            mrPdb = mrPdbRes
        else:
            mrPdb = mrPdbInfo.pdb

        # Make a copy of mrPdb with chains renamed to lower case
        ucChains = mrPdbInfo.models[0].chains
        toChains = [c.lower() for c in ucChains]
        placedAaPdb = ample_util.filename_append(filename=mrPdb,
                                                 astr="ren",
                                                 directory=self.workdir)
        pdb_edit.rename_chains(inpdb=mrPdb,
                               outpdb=placedAaPdb,
                               fromChain=ucChains,
                               toChain=toChains)

        # The list of chains in the native that we will be checking contacts from
        fromChains = nativePdbInfo.models[0].chains

        # Loop over origins, move the placed pdb to the new origin and then run ncont
        # Object to hold data on best origin
        self.data = None
        for origin in origins:
            placedOriginPdb = placedAaPdb
            if origin != [0.0, 0.0, 0.0]:
                # Move pdb to new origin
                # ostr="origin{0}".format(i)
                ostr = "o{0}".format(origin).replace(" ", "")
                placedOriginPdb = ample_util.filename_append(
                    filename=placedAaPdb, astr=ostr, directory=self.workdir)
                pdb_edit.translate(inpdb=placedAaPdb,
                                   outpdb=placedOriginPdb,
                                   ftranslate=origin)

            # Concatenate into one file
            joinedPdb = ample_util.filename_append(filename=placedOriginPdb,
                                                   astr="joined",
                                                   directory=self.workdir)
            pdb_edit.merge(pdb1=nativePdbInfo.pdb,
                           pdb2=placedOriginPdb,
                           pdbout=joinedPdb)

            # Set up object to hold data
            data = RioData()
            data.origin = origin
            data.originPdb = placedOriginPdb
            data.joinedPdb = joinedPdb
            data.fromChains = fromChains
            data.toChains = toChains
            data.numGood = 0  # For holding the metric

            # Run ncont
            if allAtom:
                self.calcAllAtom(data)
                data.numGood = data.aaNumContacts
            else:
                self.calcRio(data)
                data.numGood = data.rioInRegister + data.rioOoRegister

            # Save the first origin and only update if we get a better score
            if not self.data or data.numGood > self.data.numGood:
                self.data = data

        # End loop over origins

        # Now need to calculate data for whichever one we didn't calculate
        if allAtom:
            self.calcRio(self.data)
        else:
            self.calcAllAtom(self.data)

        if self.data.numGood > 0:

            # If we got a match run csymmatch so we can see the result
            csym = csymmatch.Csymmatch()
            csymmatchPdb = ample_util.filename_append(
                filename=self.data.originPdb,
                astr="csymmatch_best",
                directory=self.workdir)
            csym.run(refPdb=nativePdbInfo.pdb,
                     inPdb=self.data.originPdb,
                     outPdb=csymmatchPdb,
                     originHand=False)

        return self.data
Beispiel #16
0
def analyseSolution(amoptd, d, mrinfo):

    logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name']))

    mrPdb=None
    if d['MR_program']=="PHASER":
        mrPdb = d['PHASER_pdbout']
        mrMTZ = d['PHASER_mtzout']
    elif d['MR_program']=="MOLREP":
        mrPdb = d['MOLREP_pdbout']
    elif d['MR_program']=="unknown":
        return

    if mrPdb is None or not os.path.isfile(mrPdb):
        #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d))
        return

    # debug - copy into work directory as reforigin struggles with long pathnames
    shutil.copy(mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb)))
    
    mrPdbInfo = pdb_edit.get_info( mrPdb )
    
    d['num_placed_chains'] = mrPdbInfo.numChains()
    d['num_placed_atoms'] = mrPdbInfo.numAtoms()
    d['num_placed_CA'] = mrPdbInfo.numCalpha()
    
    if amoptd['native_pdb']:
        if not d['SHELXE_os']:
            logger.critical("mrPdb {0} has no SHELXE_os origin shift. Calculating...".format(mrPdb))
            mrinfo.analyse(mrPdb)
            mrOrigin = mrinfo.originShift
            d['SHELXE_MPE'] = mrinfo.MPE
            d['SHELXE_wMPE'] = mrinfo.wMPE
        else:
            mrOrigin=[c*-1 for c in d['SHELXE_os']]
        
        # Move pdb onto new origin
        originPdb = ample_util.filename_append(mrPdb, astr='offset',directory=fixpath(amoptd['benchmark_dir']))
        #print(mrPdb, originPdb, mrOrigin)
        pdb_edit.translate(mrPdb, originPdb, mrOrigin)
        
        # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native
        csymmatch.Csymmatch().wrapModelToNative(originPdb,
                                                amoptd['native_pdb'],
                                                csymmatchPdb=os.path.join(fixpath(amoptd['benchmark_dir']),
                                                "phaser_{0}_csymmatch.pdb".format(d['ensemble_name'])))
        # can now delete origin pdb
        os.unlink(originPdb)
        
        # Calculate phase error for the MR PDB
        try:
            mrinfo.analyse(mrPdb)
            d['MR_MPE'] = mrinfo.MPE
            d['MR_wMPE'] = mrinfo.wMPE
        except Exception as e:
            logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb,e))         
    
        # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model
        # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of 
        # the native structure - or if we were unable to calculate a res_seq_map
        if not (amoptd['homologs'] or \
                amoptd['ideal_helices'] or \
                amoptd['import_ensembles'] or \
                amoptd['single_model_mode'] or \
                amoptd['res_seq_map']):
    
            # Get reforigin info
            rmsder = reforigin.ReforiginRmsd()
            try:
                rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'],
                               placedPdbInfo=mrPdbInfo,
                               refModelPdbInfo=amoptd['ref_model_pdb_info'],
                               cAlphaOnly=True,
                               workdir=fixpath(amoptd['benchmark_dir']))
                d['reforigin_RMSD'] = rmsder.rmsd
            except Exception as e:
                logger.critical("Error calculating RMSD: {0}".format(e))
                d['reforigin_RMSD'] = 999
    
    
            # Score the origin with all-atom and rio
            rioData = rio.Rio().scoreOrigin(mrOrigin,
                                          mrPdbInfo=mrPdbInfo,
                                          nativePdbInfo=amoptd['native_pdb_info'],
                                          resSeqMap=amoptd['res_seq_map'],
                                          workdir=fixpath(amoptd['benchmark_dir'])
                                          )
        
            # Set attributes
            d['AA_num_contacts']  = rioData.aaNumContacts
            d['RIO_num_contacts'] = rioData.rioNumContacts
            d['RIO_in_register']  = rioData.rioInRegister
            d['RIO_oo_register']  = rioData.rioOoRegister
            d['RIO_backwards']    = rioData.rioBackwards
            d['RIO']              = rioData.rioInRegister + rioData.rioOoRegister
            d['RIO_no_cat']       = rioData.rioNumContacts - ( rioData.rioInRegister + rioData.rioOoRegister )
            d['RIO_norm']         = float(d['RIO']) / float(d['native_pdb_num_residues'])
        else:
            d['AA_num_contacts']  = None
            d['RIO_num_contacts'] = None
            d['RIO_in_register']  = None
            d['RIO_oo_register']  = None
            d['RIO_backwards']    = None
            d['RIO']              = None
            d['RIO_no_cat']       = None
            d['RIO_norm']         = None
    
    #     # Now get the helix
    #     helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts,
    #                                                            dsspLog=dsspLog )
    #     if helixSequence is not None:
    #         ampleResult.rioHelixSequence = helixSequence
    #         ampleResult.rioLenHelix      = len( helixSequence )
    #         hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) )
    #         with open( hfile, 'w' ) as f:
    #             f.write( helixSequence+"\n" )
    
        #
        # This purely for checking and so we have pdbs to view
        # 
        # Wrap shelxe trace onto native using Csymmatch
        if not d['SHELXE_pdbout'] is None and os.path.isfile(fixpath(d['SHELXE_pdbout'])):
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     workdir=fixpath(amoptd['benchmark_dir']))

        if not('SHELXE_wMPE' in d and d['SHELXE_wMPE']):
            try:
                mrinfo.analyse(d['SHELXE_pdbout'])
                d['SHELXE_MPE'] = mrinfo.MPE
                d['SHELXE_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SHELXE_pdbout: {0}\n{1}".format(d['SHELXE_pdbout'],e))    
                
        # Wrap parse_buccaneer model onto native
        if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(d['SXRBUCC_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name']))
    
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     csymmatchPdb=csymmatchPdb,
                                                     workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRBUCC_pdbout'])
                d['SXRBUCC_MPE'] = mrinfo.MPE
                d['SXRBUCC_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SXRBUCC_pdbout: {0}\n{1}".format(d['SXRBUCC_pdbout'],e))
                            
        # Wrap parse_buccaneer model onto native
        if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name']))
    
            csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']),
                                                     amoptd['native_pdb'],
                                                     origin=mrOrigin,
                                                     csymmatchPdb=csymmatchPdb,
                                                     workdir=fixpath(amoptd['benchmark_dir']))
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRARP_pdbout'])
                d['SXRARP_MPE'] = mrinfo.MPE
                d['SXRARP_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical("Error analysing SXRARP_pdbout: {0}\n{1}".format(d['SXRARP_pdbout'],e))
    return
Beispiel #17
0
def analysePdb(amoptd):
    """Collect data on the native pdb structure"""
    
    nativePdb = fixpath(amoptd['native_pdb'])
    nativePdbInfo = pdb_edit.get_info(nativePdb)
    
    # number atoms/residues
    natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb)

    # Get information on the origins for this spaceGroup
    try:
        originInfo = pdb_model.OriginInfo(spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup)
    except Exception:
        originInfo = None

    # Do this here as a bug in pdbcur can knacker the CRYST1 data
    amoptd['native_pdb_code'] = nativePdbInfo.pdbCode
    amoptd['native_pdb_title'] = nativePdbInfo.title
    amoptd['native_pdb_resolution'] = nativePdbInfo.resolution
    amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent
    amoptd['native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient
    if not originInfo:
        space_group = "P1"
    else:
        space_group = originInfo.spaceGroup()
    amoptd['native_pdb_space_group'] = space_group
    amoptd['native_pdb_num_atoms'] = natoms
    amoptd['native_pdb_num_residues'] = nresidues
    
    # First check if the native has > 1 model and extract the first if so
    if len( nativePdbInfo.models ) > 1:
        logger.info("nativePdb has > 1 model - using first")
        nativePdb1 = ample_util.filename_append( filename=nativePdb, astr="model1", directory=fixpath(amoptd['work_dir']))
        pdb_edit.extract_model( nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial )
        nativePdb = nativePdb1
        
    # Standardise the PDB to rename any non-standard AA, remove solvent etc
    nativePdbStd = ample_util.filename_append( filename=nativePdb, astr="std", directory=fixpath(amoptd['work_dir']))
    pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True)
    nativePdb = nativePdbStd
    
    # Get the new Info about the native
    nativePdbInfo = pdb_edit.get_info( nativePdb )
    
    # For comparsion of shelxe model we need a single chain from the native so we get this here
    if len( nativePdbInfo.models[0].chains ) > 1:
        nativeChain1  = ample_util.filename_append( filename=nativePdbInfo.pdb,
                                                       astr="chain1", 
                                                       directory=fixpath(amoptd['work_dir']))
        pdb_edit.merge_chains(nativePdbInfo.pdb, nativeChain1)
    else:
        nativeChain1 = nativePdbInfo.pdb
    
    # Additional data
    amoptd['native_pdb_num_chains'] = len( nativePdbInfo.models[0].chains )
    amoptd['native_pdb_info'] = nativePdbInfo
    amoptd['native_pdb_std'] = nativePdbStd
    amoptd['native_pdb_1chain'] = nativeChain1
    amoptd['native_pdb_origin_info'] = originInfo
    
    return
Beispiel #18
0
    def generate_ensembles(self,
                           models,
                           alignment_file=None,
                           ensembles_directory=None,
                           homolog_aligner=None,
                           nproc=None,
                           percent_truncation=None,
                           side_chain_treatments=SIDE_CHAIN_TREATMENTS,
                           truncation_method=None):

        if not percent_truncation:
            percent_truncation = self.percent_truncation
        if not truncation_method:
            truncation_method = self.truncation_method

        if not len(models):
            msg = "Cannot find any models for ensembling!"
            raise RuntimeError(msg)
        if not all([os.path.isfile(m) for m in models]):
            msg = "Problem reading models given to Ensembler: {0}".format(
                models)
            raise RuntimeError(msg)

        logger.info('Ensembling models in directory: %s', self.work_dir)

        # Create final ensembles directory
        if not os.path.isdir(self.ensembles_directory):
            os.mkdir(self.ensembles_directory)

        # standardise all the models
        std_models_dir = os.path.join(self.work_dir, "std_models")
        os.mkdir(std_models_dir)
        std_models = []
        for m in models:
            std_model = ample_util.filename_append(m, 'std', std_models_dir)
            pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True)
            std_models.append(std_model)

        # Get a structural alignment between the different models
        if not alignment_file:
            if homolog_aligner == 'mustang':
                logger.info("Generating alignment file with mustang_exe: %s",
                            self.mustang_exe)
                alignment_file = align_mustang(std_models,
                                               mustang_exe=self.mustang_exe,
                                               work_dir=self.work_dir)
            elif homolog_aligner == 'gesamt':
                logger.info("Generating alignment file with gesamt_exe: %s",
                            self.gesamt_exe)
                alignment_file = align_gesamt(std_models,
                                              gesamt_exe=self.gesamt_exe,
                                              work_dir=self.work_dir)
            else:
                msg = "Unknown homolog_aligner: {0}".format(homolog_aligner)
                raise RuntimeError(msg)
            logger.info("Generated alignment file: %s", alignment_file)
        else:
            logger.info("Using alignment file: %s", alignment_file)

        truncate_dir = os.path.join(self.work_dir, "homolog_truncate")
        if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir)

        # Now truncate and create ensembles - as standard ample, but with no subclustering
        self.ensembles = []
        self.truncator = truncation_util.Truncator(work_dir=truncate_dir)
        self.truncator.theseus_exe = self.theseus_exe
        for truncation in self.truncator.truncate_models(
                models=std_models,
                truncation_method=truncation_method,
                percent_truncation=percent_truncation,
                truncation_pruning=None,
                homologs=True,
                alignment_file=alignment_file):
            ensemble_dir = os.path.join(
                truncation.directory, "ensemble_{0}".format(truncation.level))
            os.mkdir(ensemble_dir)
            os.chdir(ensemble_dir)

            # Need to create an alignment file for theseus
            basename = "e{0}".format(truncation.level)
            superposed_models = self.superpose_models(truncation.models,
                                                      basename=basename,
                                                      work_dir=ensemble_dir,
                                                      homologs=True)
            if not superposed_models:
                logger.critical(
                    "Skipping ensemble %s due to error with Theseus", basename)
                continue

            # Create Ensemble object
            pre_ensemble = _ensembler.Ensemble()
            pre_ensemble.num_residues = truncation.num_residues
            pre_ensemble.truncation_dir = truncation.directory
            pre_ensemble.truncation_level = truncation.level
            pre_ensemble.truncation_method = truncation.method
            pre_ensemble.truncation_percent = truncation.percent
            pre_ensemble.truncation_residues = truncation.residues
            pre_ensemble.truncation_variance = truncation.variances
            pre_ensemble.pdb = superposed_models

            for ensemble in self.edit_side_chains(pre_ensemble,
                                                  side_chain_treatments,
                                                  homologs=True):
                self.ensembles.append(ensemble)

        return self.ensembles
Beispiel #19
0
def analyseSolution(amoptd, d, mrinfo):

    logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name']))

    mrPdb = None
    if d['MR_program'] == "PHASER":
        mrPdb = d['PHASER_pdbout']
        mrMTZ = d['PHASER_mtzout']
    elif d['MR_program'] == "MOLREP":
        mrPdb = d['MOLREP_pdbout']
    elif d['MR_program'] == "unknown":
        return

    if mrPdb is None or not os.path.isfile(mrPdb):
        # logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d))
        return

    # debug - copy into work directory as reforigin struggles with long pathnames
    shutil.copy(
        mrPdb,
        os.path.join(fixpath(amoptd['benchmark_dir']),
                     os.path.basename(mrPdb)))

    mrPdbInfo = pdb_edit.get_info(mrPdb)

    d['num_placed_chains'] = mrPdbInfo.numChains()
    d['num_placed_atoms'] = mrPdbInfo.numAtoms()
    d['num_placed_CA'] = mrPdbInfo.numCalpha()

    if amoptd['native_pdb']:
        if not d['SHELXE_os']:
            logger.critical(
                "mrPdb {0} has no SHELXE_os origin shift. Calculating...".
                format(mrPdb))
            mrinfo.analyse(mrPdb)
            mrOrigin = mrinfo.originShift
            d['SHELXE_MPE'] = mrinfo.MPE
            d['SHELXE_wMPE'] = mrinfo.wMPE
        else:
            mrOrigin = [c * -1 for c in d['SHELXE_os']]

        # Move pdb onto new origin
        originPdb = ample_util.filename_append(mrPdb,
                                               astr='offset',
                                               directory=fixpath(
                                                   amoptd['benchmark_dir']))
        pdb_edit.translate(mrPdb, originPdb, mrOrigin)

        # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native
        csymmatch.Csymmatch().wrapModelToNative(
            originPdb,
            amoptd['native_pdb'],
            csymmatchPdb=os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "phaser_{0}_csymmatch.pdb".format(d['ensemble_name'])),
        )
        # can now delete origin pdb
        os.unlink(originPdb)

        # Calculate phase error for the MR PDB
        try:
            mrinfo.analyse(mrPdb)
            d['MR_MPE'] = mrinfo.MPE
            d['MR_wMPE'] = mrinfo.wMPE
        except Exception as e:
            logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb, e))

        # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model
        # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of
        # the native structure - or if we were unable to calculate a res_seq_map
        if not (amoptd['homologs'] or amoptd['ideal_helices']
                or amoptd['import_ensembles'] or amoptd['single_model_mode']
                or amoptd['res_seq_map']):

            # Get reforigin info
            rmsder = reforigin.ReforiginRmsd()
            try:
                rmsder.getRmsd(
                    nativePdbInfo=amoptd['native_pdb_info'],
                    placedPdbInfo=mrPdbInfo,
                    refModelPdbInfo=amoptd['ref_model_pdb_info'],
                    cAlphaOnly=True,
                    workdir=fixpath(amoptd['benchmark_dir']),
                )
                d['reforigin_RMSD'] = rmsder.rmsd
            except Exception as e:
                logger.critical("Error calculating RMSD: {0}".format(e))
                d['reforigin_RMSD'] = 999

            # Score the origin with all-atom and rio
            rioData = rio.Rio().scoreOrigin(
                mrOrigin,
                mrPdbInfo=mrPdbInfo,
                nativePdbInfo=amoptd['native_pdb_info'],
                resSeqMap=amoptd['res_seq_map'],
                workdir=fixpath(amoptd['benchmark_dir']),
            )

            # Set attributes
            d['AA_num_contacts'] = rioData.aaNumContacts
            d['RIO_num_contacts'] = rioData.rioNumContacts
            d['RIO_in_register'] = rioData.rioInRegister
            d['RIO_oo_register'] = rioData.rioOoRegister
            d['RIO_backwards'] = rioData.rioBackwards
            d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister
            d['RIO_no_cat'] = rioData.rioNumContacts - (rioData.rioInRegister +
                                                        rioData.rioOoRegister)
            d['RIO_norm'] = float(d['RIO']) / float(
                d['native_pdb_num_residues'])
        else:
            d['AA_num_contacts'] = None
            d['RIO_num_contacts'] = None
            d['RIO_in_register'] = None
            d['RIO_oo_register'] = None
            d['RIO_backwards'] = None
            d['RIO'] = None
            d['RIO_no_cat'] = None
            d['RIO_norm'] = None

        #     # Now get the helix
        #     helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts,
        #                                                            dsspLog=dsspLog )
        #     if helixSequence is not None:
        #         ampleResult.rioHelixSequence = helixSequence
        #         ampleResult.rioLenHelix      = len( helixSequence )
        #         hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) )
        #         with open( hfile, 'w' ) as f:
        #             f.write( helixSequence+"\n" )

        #
        # This purely for checking and so we have pdbs to view
        #
        # Wrap shelxe trace onto native using Csymmatch
        if not d['SHELXE_pdbout'] is None and os.path.isfile(
                fixpath(d['SHELXE_pdbout'])):
            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SHELXE_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                workdir=fixpath(amoptd['benchmark_dir']),
            )

        if not ('SHELXE_wMPE' in d and d['SHELXE_wMPE']):
            try:
                mrinfo.analyse(d['SHELXE_pdbout'])
                d['SHELXE_MPE'] = mrinfo.MPE
                d['SHELXE_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SHELXE_pdbout: {0}\n{1}".format(
                        d['SHELXE_pdbout'], e))

        # Wrap parse_buccaneer model onto native
        if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(
                d['SXRBUCC_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name']))

            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SXRBUCC_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                csymmatchPdb=csymmatchPdb,
                workdir=fixpath(amoptd['benchmark_dir']),
            )
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRBUCC_pdbout'])
                d['SXRBUCC_MPE'] = mrinfo.MPE
                d['SXRBUCC_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SXRBUCC_pdbout: {0}\n{1}".format(
                        d['SXRBUCC_pdbout'], e))

        # Wrap parse_buccaneer model onto native
        if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])):
            # Need to rename Pdb as is just called buccSX_output.pdb
            csymmatchPdb = os.path.join(
                fixpath(amoptd['benchmark_dir']),
                "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name']))

            csymmatch.Csymmatch().wrapModelToNative(
                fixpath(d['SXRARP_pdbout']),
                amoptd['native_pdb'],
                origin=mrOrigin,
                csymmatchPdb=csymmatchPdb,
                workdir=fixpath(amoptd['benchmark_dir']),
            )
            # Calculate phase error
            try:
                mrinfo.analyse(d['SXRARP_pdbout'])
                d['SXRARP_MPE'] = mrinfo.MPE
                d['SXRARP_wMPE'] = mrinfo.wMPE
            except Exception as e:
                logger.critical(
                    "Error analysing SXRARP_pdbout: {0}\n{1}".format(
                        d['SXRARP_pdbout'], e))
    return