def molprobity_validation(targetid, ensembler_stage=None, loglevel=None): set_loglevel(loglevel) valid_model_ids = [] if mpistate.rank == 0: if ensembler_stage is None: ensembler_stage = get_most_advanced_ensembler_modeling_stage( targetid) valid_model_ids = get_valid_model_ids(ensembler_stage, targetid) if ensembler_stage is None: ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0) valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0) nvalid_model_ids = len(valid_model_ids) model_structure_filename = model_filenames_by_ensembler_stage[ ensembler_stage] models_target_dir = os.path.join(default_project_dirnames.models, targetid) molprobity_results_filepath = os.path.join( models_target_dir, 'validation_scores_sorted-molprobity-{}'.format(ensembler_stage)) molprobity_scores_sublist = [] for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size): model_id = valid_model_ids[model_index] logger.debug('MPI process {} working on model {}'.format( mpistate.rank, model_id)) molprobity_score = run_molprobity_oneline_analysis_and_write_results( targetid, model_id, ensembler_stage, model_structure_filename=model_structure_filename, models_target_dir=models_target_dir, ) molprobity_scores_sublist.append((model_id, molprobity_score)) molprobity_scores_gathered_list = mpistate.comm.gather( molprobity_scores_sublist, root=0) if mpistate.rank == 0: molprobity_scores_list_of_tuples = [ item for sublist in molprobity_scores_gathered_list for item in sublist ] molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples, key=lambda x: x[1]) write_molprobity_scores_list(molprobity_scores_sorted, molprobity_results_filepath)
def molprobity_validation(targetid, ensembler_stage=None, loglevel=None): set_loglevel(loglevel) valid_model_ids = [] if mpistate.rank == 0: if ensembler_stage is None: ensembler_stage = get_most_advanced_ensembler_modeling_stage(targetid) valid_model_ids = get_valid_model_ids(ensembler_stage, targetid) if ensembler_stage is None: ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0) valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0) nvalid_model_ids = len(valid_model_ids) model_structure_filename = model_filenames_by_ensembler_stage[ensembler_stage] models_target_dir = os.path.join(default_project_dirnames.models, targetid) molprobity_results_filepath = os.path.join( models_target_dir, "validation_scores_sorted-molprobity-{}".format(ensembler_stage) ) molprobity_scores_sublist = [] for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size): model_id = valid_model_ids[model_index] logger.debug("MPI process {} working on model {}".format(mpistate.rank, model_id)) molprobity_score = run_molprobity_oneline_analysis_and_write_results( targetid, model_id, ensembler_stage, model_structure_filename=model_structure_filename, models_target_dir=models_target_dir, ) molprobity_scores_sublist.append((model_id, molprobity_score)) molprobity_scores_gathered_list = mpistate.comm.gather(molprobity_scores_sublist, root=0) if mpistate.rank == 0: molprobity_scores_list_of_tuples = [item for sublist in molprobity_scores_gathered_list for item in sublist] molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples, key=lambda x: x[1]) write_molprobity_scores_list(molprobity_scores_sorted, molprobity_results_filepath)
def __init__(self, targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None, loglevel=None, run_main=True): """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other software, e.g. for visualization with PyMOL or VMD. Parameters ---------- targetid : str e.g. 'EGFR_HUMAN_D0' ensembler_stage : str The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory built from the 'model.pdb.gz' files output by the build_models command. options: build_models|refine_implicit_md|refine_explicit_md default: most advanced stage for which model files are available traj_filepath : str default: models/[targetid]/traj-[ensembler_stage].xtc topol_filepath : str default: models/[targetid]/traj-[ensembler_stage]-topol.pdb models_data_filepath : default: models/[targetid]/traj-[ensembler_stage]-data.csv process_only_these_templates : list of str Returns ------- traj : mdtraj.Trajectory df : pandas.DataFrame models data (e.g. sequence identities): """ ensembler.utils.set_loglevel(loglevel) ensembler.core.check_project_toplevel_dir() self.models_target_dir = os.path.join(default_project_dirnames.models, targetid) logger.debug('Working on target %s' % targetid) if ensembler_stage is None: self.ensembler_stage = get_most_advanced_ensembler_modeling_stage( targetid) else: self.ensembler_stage = ensembler_stage if traj_filepath is None: self.traj_filepath = os.path.join( self.models_target_dir, 'traj-{0}.xtc'.format(self.ensembler_stage)) else: self.traj_filepath = traj_filepath if topol_filepath is None: self.topol_filepath = os.path.join( self.models_target_dir, 'traj-{0}-topol.pdb'.format(self.ensembler_stage)) else: self.topol_filepath = topol_filepath if models_data_filepath is None: self.models_data_filepath = os.path.join( self.models_target_dir, 'traj-{0}-data.csv'.format(self.ensembler_stage)) else: self.models_data_filepath = models_data_filepath if process_only_these_templates: self.templateids = process_only_these_templates else: directories = [ directory for directory in os.walk(self.models_target_dir) ] self.templateids = directories[0][1] if run_main: self._gen_df() self.df.to_csv(self.models_data_filepath, columns=['templateid', 'seqid']) self._construct_traj() self._superpose() self._write_traj()
def __init__(self, targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None, loglevel=None, run_main=True): """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other software, e.g. for visualization with PyMOL or VMD. Parameters ---------- targetid : str e.g. 'EGFR_HUMAN_D0' ensembler_stage : str The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory built from the 'model.pdb.gz' files output by the build_models command. options: build_models|refine_implicit_md|refine_explicit_md default: most advanced stage for which model files are available traj_filepath : str default: models/[targetid]/traj-[ensembler_stage].xtc topol_filepath : str default: models/[targetid]/traj-[ensembler_stage]-topol.pdb models_data_filepath : default: models/[targetid]/traj-[ensembler_stage]-data.csv process_only_these_templates : list of str Returns ------- traj : mdtraj.Trajectory df : pandas.DataFrame models data (e.g. sequence identities): """ ensembler.utils.set_loglevel(loglevel) ensembler.core.check_project_toplevel_dir() self.models_target_dir = os.path.join(default_project_dirnames.models, targetid) logger.debug('Working on target %s' % targetid) if ensembler_stage is None: self.ensembler_stage = get_most_advanced_ensembler_modeling_stage(targetid) else: self.ensembler_stage = ensembler_stage if traj_filepath is None: self.traj_filepath = os.path.join( self.models_target_dir, 'traj-{0}.xtc'.format(self.ensembler_stage) ) else: self.traj_filepath = traj_filepath if topol_filepath is None: self.topol_filepath = os.path.join( self.models_target_dir, 'traj-{0}-topol.pdb'.format(self.ensembler_stage) ) else: self.topol_filepath = topol_filepath if models_data_filepath is None: self.models_data_filepath = os.path.join( self.models_target_dir, 'traj-{0}-data.csv'.format(self.ensembler_stage) ) else: self.models_data_filepath = models_data_filepath if process_only_these_templates: self.templateids = process_only_these_templates else: directories = [ directory for directory in os.walk(self.models_target_dir) ] self.templateids = directories[0][1] if run_main: self._gen_df() self.df.to_csv(self.models_data_filepath, columns=['templateid', 'seqid']) self._construct_traj() self._superpose() self._write_traj()