Beispiel #1
0
def molprobity_validation(targetid, ensembler_stage=None, loglevel=None):
    set_loglevel(loglevel)
    valid_model_ids = []
    if mpistate.rank == 0:
        if ensembler_stage is None:
            ensembler_stage = get_most_advanced_ensembler_modeling_stage(
                targetid)
        valid_model_ids = get_valid_model_ids(ensembler_stage, targetid)
    if ensembler_stage is None:
        ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0)
    valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0)
    nvalid_model_ids = len(valid_model_ids)
    model_structure_filename = model_filenames_by_ensembler_stage[
        ensembler_stage]

    models_target_dir = os.path.join(default_project_dirnames.models, targetid)
    molprobity_results_filepath = os.path.join(
        models_target_dir,
        'validation_scores_sorted-molprobity-{}'.format(ensembler_stage))

    molprobity_scores_sublist = []
    for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size):
        model_id = valid_model_ids[model_index]

        logger.debug('MPI process {} working on model {}'.format(
            mpistate.rank, model_id))

        molprobity_score = run_molprobity_oneline_analysis_and_write_results(
            targetid,
            model_id,
            ensembler_stage,
            model_structure_filename=model_structure_filename,
            models_target_dir=models_target_dir,
        )

        molprobity_scores_sublist.append((model_id, molprobity_score))

    molprobity_scores_gathered_list = mpistate.comm.gather(
        molprobity_scores_sublist, root=0)
    if mpistate.rank == 0:
        molprobity_scores_list_of_tuples = [
            item for sublist in molprobity_scores_gathered_list
            for item in sublist
        ]
        molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples,
                                          key=lambda x: x[1])
        write_molprobity_scores_list(molprobity_scores_sorted,
                                     molprobity_results_filepath)
Beispiel #2
0
def molprobity_validation(targetid, ensembler_stage=None, loglevel=None):
    set_loglevel(loglevel)
    valid_model_ids = []
    if mpistate.rank == 0:
        if ensembler_stage is None:
            ensembler_stage = get_most_advanced_ensembler_modeling_stage(targetid)
        valid_model_ids = get_valid_model_ids(ensembler_stage, targetid)
    if ensembler_stage is None:
        ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0)
    valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0)
    nvalid_model_ids = len(valid_model_ids)
    model_structure_filename = model_filenames_by_ensembler_stage[ensembler_stage]

    models_target_dir = os.path.join(default_project_dirnames.models, targetid)
    molprobity_results_filepath = os.path.join(
        models_target_dir, "validation_scores_sorted-molprobity-{}".format(ensembler_stage)
    )

    molprobity_scores_sublist = []
    for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size):
        model_id = valid_model_ids[model_index]

        logger.debug("MPI process {} working on model {}".format(mpistate.rank, model_id))

        molprobity_score = run_molprobity_oneline_analysis_and_write_results(
            targetid,
            model_id,
            ensembler_stage,
            model_structure_filename=model_structure_filename,
            models_target_dir=models_target_dir,
        )

        molprobity_scores_sublist.append((model_id, molprobity_score))

    molprobity_scores_gathered_list = mpistate.comm.gather(molprobity_scores_sublist, root=0)
    if mpistate.rank == 0:
        molprobity_scores_list_of_tuples = [item for sublist in molprobity_scores_gathered_list for item in sublist]
        molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples, key=lambda x: x[1])
        write_molprobity_scores_list(molprobity_scores_sorted, molprobity_results_filepath)
Beispiel #3
0
    def __init__(self,
                 targetid,
                 ensembler_stage=None,
                 traj_filepath=None,
                 topol_filepath=None,
                 models_data_filepath=None,
                 process_only_these_templates=None,
                 loglevel=None,
                 run_main=True):
        """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other
        software, e.g. for visualization with PyMOL or VMD.

        Parameters
        ----------
        targetid : str
            e.g. 'EGFR_HUMAN_D0'
        ensembler_stage : str
            The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory
            built from the 'model.pdb.gz' files output by the build_models command.
            options: build_models|refine_implicit_md|refine_explicit_md
            default: most advanced stage for which model files are available
        traj_filepath : str
            default: models/[targetid]/traj-[ensembler_stage].xtc
        topol_filepath : str
            default: models/[targetid]/traj-[ensembler_stage]-topol.pdb
        models_data_filepath :
            default: models/[targetid]/traj-[ensembler_stage]-data.csv
        process_only_these_templates : list of str

        Returns
        -------
        traj : mdtraj.Trajectory
        df : pandas.DataFrame
            models data (e.g. sequence identities):

        """
        ensembler.utils.set_loglevel(loglevel)
        ensembler.core.check_project_toplevel_dir()
        self.models_target_dir = os.path.join(default_project_dirnames.models,
                                              targetid)

        logger.debug('Working on target %s' % targetid)

        if ensembler_stage is None:
            self.ensembler_stage = get_most_advanced_ensembler_modeling_stage(
                targetid)
        else:
            self.ensembler_stage = ensembler_stage

        if traj_filepath is None:
            self.traj_filepath = os.path.join(
                self.models_target_dir,
                'traj-{0}.xtc'.format(self.ensembler_stage))
        else:
            self.traj_filepath = traj_filepath

        if topol_filepath is None:
            self.topol_filepath = os.path.join(
                self.models_target_dir,
                'traj-{0}-topol.pdb'.format(self.ensembler_stage))
        else:
            self.topol_filepath = topol_filepath

        if models_data_filepath is None:
            self.models_data_filepath = os.path.join(
                self.models_target_dir,
                'traj-{0}-data.csv'.format(self.ensembler_stage))
        else:
            self.models_data_filepath = models_data_filepath

        if process_only_these_templates:
            self.templateids = process_only_these_templates
        else:
            directories = [
                directory for directory in os.walk(self.models_target_dir)
            ]
            self.templateids = directories[0][1]

        if run_main:
            self._gen_df()
            self.df.to_csv(self.models_data_filepath,
                           columns=['templateid', 'seqid'])
            self._construct_traj()
            self._superpose()
            self._write_traj()
Beispiel #4
0
    def __init__(self, targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None,
           models_data_filepath=None, process_only_these_templates=None, loglevel=None,
           run_main=True):
        """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other
        software, e.g. for visualization with PyMOL or VMD.

        Parameters
        ----------
        targetid : str
            e.g. 'EGFR_HUMAN_D0'
        ensembler_stage : str
            The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory
            built from the 'model.pdb.gz' files output by the build_models command.
            options: build_models|refine_implicit_md|refine_explicit_md
            default: most advanced stage for which model files are available
        traj_filepath : str
            default: models/[targetid]/traj-[ensembler_stage].xtc
        topol_filepath : str
            default: models/[targetid]/traj-[ensembler_stage]-topol.pdb
        models_data_filepath :
            default: models/[targetid]/traj-[ensembler_stage]-data.csv
        process_only_these_templates : list of str

        Returns
        -------
        traj : mdtraj.Trajectory
        df : pandas.DataFrame
            models data (e.g. sequence identities):

        """
        ensembler.utils.set_loglevel(loglevel)
        ensembler.core.check_project_toplevel_dir()
        self.models_target_dir = os.path.join(default_project_dirnames.models, targetid)

        logger.debug('Working on target %s' % targetid)

        if ensembler_stage is None:
            self.ensembler_stage = get_most_advanced_ensembler_modeling_stage(targetid)
        else:
            self.ensembler_stage = ensembler_stage

        if traj_filepath is None:
            self.traj_filepath = os.path.join(
                self.models_target_dir, 'traj-{0}.xtc'.format(self.ensembler_stage)
            )
        else:
            self.traj_filepath = traj_filepath

        if topol_filepath is None:
            self.topol_filepath = os.path.join(
                self.models_target_dir, 'traj-{0}-topol.pdb'.format(self.ensembler_stage)
            )
        else:
            self.topol_filepath = topol_filepath

        if models_data_filepath is None:
            self.models_data_filepath = os.path.join(
                self.models_target_dir, 'traj-{0}-data.csv'.format(self.ensembler_stage)
            )
        else:
            self.models_data_filepath = models_data_filepath

        if process_only_these_templates:
            self.templateids = process_only_these_templates
        else:
            directories = [ directory for directory in os.walk(self.models_target_dir) ]
            self.templateids = directories[0][1]

        if run_main:
            self._gen_df()
            self.df.to_csv(self.models_data_filepath, columns=['templateid', 'seqid'])
            self._construct_traj()
            self._superpose()
            self._write_traj()