コード例 #1
0
    def mk_native_files(self, native_pdb, native_mtz):
        """Create the files required by SHELXE from the native structure

        Parameters
        ----------
        native_pdb : str
          Path to the native PDB file
        native_mtz : str
          Path to the native MTZ file

        """
        mtz_util.to_hkl(native_mtz, hkl_file=os.path.join(self.work_dir, self.stem + ".hkl"))
        shutil.copyfile(native_pdb, os.path.join(self.work_dir, self.stem + ".ent"))
コード例 #2
0
ファイル: shelxe.py プロジェクト: tiger-tiger/ample
    def mk_native_files(self, native_pdb, native_mtz):
        """Create the files required by SHELXE from the native structure

        Parameters
        ----------
        native_pdb : str
          Path to the native PDB file
        native_mtz : str
          Path to the native MTZ file

        """
        mtz_util.to_hkl(native_mtz,
                        hkl_file=os.path.join(self.work_dir,
                                              self.stem + ".hkl"))
        shutil.copyfile(native_pdb,
                        os.path.join(self.work_dir, self.stem + ".ent"))
コード例 #3
0
ファイル: benchmark_util.py プロジェクト: tiger-tiger/ample
def analyse(amoptd, newroot=None):
    if newroot:
        assert os.path.isdir(newroot)
        global _oldroot, _newroot
        _newroot = newroot
        _oldroot = amoptd['work_dir']

    if not os.path.isdir(fixpath(amoptd['benchmark_dir'])):
        os.mkdir(fixpath(amoptd['benchmark_dir']))
    os.chdir(fixpath(amoptd['benchmark_dir']))

    # AnalysePdb may have already been called from the main script
    if amoptd['native_pdb'] and 'native_pdb_std' not in amoptd:
        analysePdb(amoptd)

    if amoptd['native_pdb_std']:
        # Generate an SHELXE HKL and ENT file so that we can calculate phase errors
        mtz_util.to_hkl(amoptd['mtz'],
                        hkl_file=os.path.join(amoptd['benchmark_dir'],
                                              SHELXE_STEM + ".hkl"))
        shutil.copyfile(
            amoptd['native_pdb_std'],
            os.path.join(amoptd['benchmark_dir'], SHELXE_STEM + ".ent"))

    if amoptd['native_pdb'] and \
       not (amoptd['homologs'] or amoptd['ideal_helices']
            or amoptd['import_ensembles'] or amoptd['single_model_mode']):
        analyseModels(amoptd)

    # Get the ensembling data
    if 'ensembles_data' not in amoptd or not len(amoptd['ensembles_data']):
        logger.critical("Benchmark cannot find any ensemble data!")
        return

    # Get dict of ensemble name -> ensemble result
    ensemble_results = {e['name']: e for e in amoptd['ensembles_data']}

    # Get mrbump_results for cluster
    if 'mrbump_results' not in amoptd or not len(amoptd['mrbump_results']):
        logger.critical("Benchmark cannot find any mrbump results!")
        return

    data = []
    mrinfo = shelxe.MRinfo(amoptd['shelxe_exe'], amoptd['native_pdb_info'].pdb,
                           amoptd['mtz'])
    for result in amoptd['mrbump_results']:

        # use mrbump dict as basis for result object
        d = copy.copy(result)

        # Add in the data from the ensemble
        d.update(ensemble_results[d['ensemble_name']])
        assert d['ensemble_name'] == d['name'], d

        # Hack for old results
        if 'truncation_num_residues' in d:
            d['num_residues'] = d['truncation_num_residues']
            del d['truncation_num_residues']

        # Hack for ideal helices where num_residues are missing
        if amoptd['ideal_helices'] and ('num_residues' not in d
                                        or d['num_residues'] is None):
            d['num_residues'] = int(d['ensemble_name'].lstrip('polyala'))

        # Get the ensemble data and add to the MRBUMP data
        d['ensemble_percent_model'] = int(
            (float(d['num_residues']) / float(amoptd['fasta_length'])) * 100)

        if amoptd['native_pdb']:
            # Add in stuff we've cleaned from the pdb
            native_keys = [
                'native_pdb_code', 'native_pdb_title', 'native_pdb_resolution',
                'native_pdb_solvent_content', 'native_pdb_space_group',
                'native_pdb_num_chains', 'native_pdb_num_atoms',
                'native_pdb_num_residues'
            ]
            d.update({key: amoptd[key] for key in native_keys})
            # Analyse the solution
            analyseSolution(amoptd, d, mrinfo)
        data.append(d)

    # Put everything in a pandas DataFrame
    dframe = pd.DataFrame(data)

    # General stuff
    dframe['ample_version'] = amoptd['ample_version']
    dframe['fasta_length'] = amoptd['fasta_length']

    # Analyse subcluster centroid models
    if 'subcluster_centroid_model' in dframe.columns and amoptd['native_pdb']:
        centroid_index = dframe.index
        centroid_models = [
            fixpath(f) for f in dframe.subcluster_centroid_model
        ]
        native_pdb_std = fixpath(amoptd['native_pdb_std'])
        fasta = fixpath(amoptd['fasta'])

        # Calculation of TMscores for subcluster centroid models
        if amoptd['have_tmscore']:
            tm = tm_util.TMscore(amoptd['tmscore_exe'],
                                 wdir=fixpath(amoptd['benchmark_dir']),
                                 **amoptd)
            tm_results = tm.compare_structures(centroid_models,
                                               [native_pdb_std], [fasta])
            centroid_tmscores = [r['tmscore'] for r in tm_results]
            centroid_rmsds = [r['rmsd'] for r in tm_results]

        else:
            # Use maxcluster
            centroid_tmscores = []
            for centroid_model in centroid_models:
                n = os.path.splitext(os.path.basename(centroid_model))[0]
                cm = None
                for pdb in amoptd['models']:
                    if n.startswith(
                            os.path.splitext(os.path.basename(pdb))[0]):
                        cm = pdb
                        break
                if cm:
                    centroid_tmscores.append(_MAXCLUSTERER.tm(cm))
                else:
                    msg = "Cannot find model for subcluster_centroid_model {0}".format(
                        dframe[0, 'subcluster_centroid_model'])
                    raise RuntimeError(msg)

            # There is an issue here as this is the RMSD over the TM-aligned residues
            # NOT the global RMSD, which it is for the tm_util results
            centroid_rmsds = [None for _ in centroid_index]

        dframe['subcluster_centroid_model_TM'] = pd.Series(
            centroid_tmscores, index=centroid_index)
        dframe['subcluster_centroid_model_RMSD'] = pd.Series(
            centroid_rmsds, index=centroid_index)

    # Save the data
    file_name = os.path.join(fixpath(amoptd['benchmark_dir']), 'results.csv')
    dframe.to_csv(file_name, columns=_CSV_KEYLIST, index=False, na_rep="N/A")
    amoptd['benchmark_results'] = dframe.to_dict('records')

    return
コード例 #4
0
def analyse(amoptd, newroot=None):
    if newroot:
        assert os.path.isdir(newroot)
        global _oldroot,_newroot
        _newroot = newroot
        _oldroot = amoptd['work_dir']

    if not os.path.isdir(fixpath(amoptd['benchmark_dir'])):
        os.mkdir(fixpath(amoptd['benchmark_dir']))
    os.chdir(fixpath(amoptd['benchmark_dir']))
    
    # AnalysePdb may have already been called from the main script
    if amoptd['native_pdb'] and 'native_pdb_std' not in amoptd:
        analysePdb(amoptd)

    if amoptd['native_pdb_std']:
        # Generate an SHELXE HKL and ENT file so that we can calculate phase errors
        mtz_util.to_hkl(amoptd['mtz'], hkl_file=os.path.join(amoptd['benchmark_dir'], SHELXE_STEM + ".hkl"))
        shutil.copyfile(amoptd['native_pdb_std'], os.path.join(amoptd['benchmark_dir'], SHELXE_STEM + ".ent"))
        
    if amoptd['native_pdb'] and \
       not (amoptd['homologs'] or amoptd['ideal_helices']
            or amoptd['import_ensembles'] or amoptd['single_model_mode']):
        analyseModels(amoptd)
    
    # Get the ensembling data
    if 'ensembles_data' not in amoptd or not len(amoptd['ensembles_data']):
        logger.critical("Benchmark cannot find any ensemble data!")
        return

    # Get dict of ensemble name -> ensemble result
    ensemble_results = {
        e['name']: e for e in amoptd['ensembles_data']
    }
                    
    # Get mrbump_results for cluster
    if 'mrbump_results' not in amoptd or not len(amoptd['mrbump_results']):
        logger.critical("Benchmark cannot find any mrbump results!")
        return
    
    data = []
    mrinfo = shelxe.MRinfo(amoptd['shelxe_exe'], amoptd['native_pdb_info'].pdb, amoptd['mtz'])
    for result in amoptd['mrbump_results']:
        
        # use mrbump dict as basis for result object
        d = copy.copy(result)
        
        # Add in the data from the ensemble
        d.update(ensemble_results[d['ensemble_name']])
        assert d['ensemble_name'] == d['name'], d
        
        # Hack for old results
        if 'truncation_num_residues' in d:
            d['num_residues'] = d['truncation_num_residues']
            del d['truncation_num_residues']
            
        # Hack for ideal helices where num_residues are missing
        if amoptd['ideal_helices'] and ('num_residues' not in d or d['num_residues'] is None):
            d['num_residues'] = int(d['ensemble_name'].lstrip('polyala'))

        # Get the ensemble data and add to the MRBUMP data
        d['ensemble_percent_model'] = int((float(d['num_residues']) / float(amoptd['fasta_length'])) * 100)

        if amoptd['native_pdb']:
            # Add in stuff we've cleaned from the pdb
            native_keys = [
                'native_pdb_code', 'native_pdb_title', 'native_pdb_resolution', 'native_pdb_solvent_content',
                'native_pdb_space_group', 'native_pdb_num_chains', 'native_pdb_num_atoms', 'native_pdb_num_residues'
            ]
            d.update({key: amoptd[key] for key in native_keys})
            # Analyse the solution
            analyseSolution(amoptd, d, mrinfo)
        data.append(d)

    # Put everything in a pandas DataFrame
    dframe = pd.DataFrame(data)

    # General stuff
    dframe['ample_version'] = amoptd['ample_version']
    dframe['fasta_length'] = amoptd['fasta_length']

    # Analyse subcluster centroid models
    if 'subcluster_centroid_model' in dframe.columns and amoptd['native_pdb']:
        centroid_index = dframe.index
        centroid_models = [fixpath(f) for f in dframe.subcluster_centroid_model]
        native_pdb_std = fixpath(amoptd['native_pdb_std'])
        fasta = fixpath(amoptd['fasta'])

        # Calculation of TMscores for subcluster centroid models
        if amoptd['have_tmscore']:
            tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir']), **amoptd)
            tm_results = tm.compare_structures(centroid_models, [native_pdb_std], [fasta])
            centroid_tmscores = [r['tmscore'] for r in tm_results]
            centroid_rmsds = [r['rmsd'] for r in tm_results]
        else:
            raise RuntimeError("No program to calculate tmscores!")

        dframe['subcluster_centroid_model_TM'] = pd.Series(centroid_tmscores, index=centroid_index)
        dframe['subcluster_centroid_model_RMSD'] = pd.Series(centroid_rmsds, index=centroid_index)

    # Save the data
    file_name = os.path.join(fixpath(amoptd['benchmark_dir']), 'results.csv')
    dframe.to_csv(file_name, columns=_CSV_KEYLIST, index=False, na_rep="N/A")
    amoptd['benchmark_results'] = dframe.to_dict('records')

    return