コード例 #1
0
ファイル: validation.py プロジェクト: sonyahanson/ensembler
def molprobity_validation_multiple_targets(targetids=None, modeling_stage=None, loglevel=None):
    """
Calculate model quality using MolProbity ``oneline-analysis`` command.

For each target, this function outputs a text file named
``models/[targetid]/validation_scores_sorted-[method]-[ensembler_stage]`` which contains a list of
targetids sorted by validation score. This can be used by the subsequent ``package_models`` command
to filter out models below a specified quality threshold.

Typically, this should be run after models have been refined to the desired extent (e.g. after
implicit or explicit MD refinement)

More detailed validation results are written to the individual model directories.

MPI-enabled.

    Parameters
    ----------
    targetids: list of str or str
    modeling_stage: str
        {None|build_models|refine_implicit_md|refine_explicit_md}
        Default: None (automatically selects most advanced stage)
    """
    set_loglevel(loglevel)
    if targetids is None:
        targetids = [target.id for target in get_targets()]
    elif type(targetids) is str:
        targetids = [targetids]
    for targetid in targetids:
        logger.info("Working on target {}".format(targetid))
        molprobity_validation(targetid=targetid, ensembler_stage=modeling_stage, loglevel=loglevel)
コード例 #2
0
def molprobity_validation_multiple_targets(targetids=None,
                                           modeling_stage=None,
                                           loglevel=None):
    """
Calculate model quality using MolProbity ``oneline-analysis`` command.

For each target, this function outputs a text file named
``models/[targetid]/validation_scores_sorted-[method]-[ensembler_stage]`` which contains a list of
targetids sorted by validation score. This can be used by the subsequent ``package_models`` command
to filter out models below a specified quality threshold.

Typically, this should be run after models have been refined to the desired extent (e.g. after
implicit or explicit MD refinement)

More detailed validation results are written to the individual model directories.

MPI-enabled.

    Parameters
    ----------
    targetids: list of str or str
    modeling_stage: str
        {None|build_models|refine_implicit_md|refine_explicit_md}
        Default: None (automatically selects most advanced stage)
    """
    set_loglevel(loglevel)
    if targetids is None:
        targetids = [target.id for target in get_targets()]
    elif type(targetids) is str:
        targetids = [targetids]
    for targetid in targetids:
        logger.info('Working on target {}'.format(targetid))
        molprobity_validation(targetid=targetid,
                              ensembler_stage=modeling_stage,
                              loglevel=loglevel)
コード例 #3
0
def test_pdbfix_templates():
    set_loglevel('debug')
    template1_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'KC1D_HUMAN_D0_4KB8_D.pdb.gz'))
    template1_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'KC1D_HUMAN_D0_4KB8_D.pdb')
    template2_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'KC1D_HUMAN_D0_3UYS_D.pdb.gz'))
    template2_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'KC1D_HUMAN_D0_3UYS_D.pdb')
    with ensembler.utils.enter_temp_dir():
        ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_resolved)
        ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_modeled_loops)
        with gzip.open(template1_pdb_gz_filepath) as template1_pdb_gz_file:
            with open(template1_pdb_filepath, 'w') as template1_pdb_file:
                contents = template1_pdb_gz_file.read()
                if type(contents) == bytes:
                    contents = contents.decode('utf-8')
                template1_pdb_file.write(contents)
        with gzip.open(template2_pdb_gz_filepath) as template2_pdb_gz_file:
            with open(template2_pdb_filepath, 'w') as template2_pdb_file:
                contents = template2_pdb_gz_file.read()
                if type(contents) == bytes:
                    contents = contents.decode('utf-8')
                template2_pdb_file.write(contents)

        template1 = Mock()
        template1.id = 'KC1D_HUMAN_D0_4KB8_D'
        template1.seq = 'LRVGNRYRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLFHRQGFSYDYVFDWNMLKFGASRAADDAERERRDREERLRH'

        template2 = Mock()
        template2.id = 'KC1D_HUMAN_D0_3UYS_D'
        template2.seq = 'MELRVGNRYRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLFHRQGFSYDYVFDWNMLK'

        templates = [template1, template2]

        missing_residues_list = pdbfix_templates(templates)
コード例 #4
0
def molprobity_validation(targetid, ensembler_stage=None, loglevel=None):
    set_loglevel(loglevel)
    valid_model_ids = []
    if mpistate.rank == 0:
        if ensembler_stage is None:
            ensembler_stage = get_most_advanced_ensembler_modeling_stage(
                targetid)
        valid_model_ids = get_valid_model_ids(ensembler_stage, targetid)
    if ensembler_stage is None:
        ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0)
    valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0)
    nvalid_model_ids = len(valid_model_ids)
    model_structure_filename = model_filenames_by_ensembler_stage[
        ensembler_stage]

    models_target_dir = os.path.join(default_project_dirnames.models, targetid)
    molprobity_results_filepath = os.path.join(
        models_target_dir,
        'validation_scores_sorted-molprobity-{}'.format(ensembler_stage))

    molprobity_scores_sublist = []
    for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size):
        model_id = valid_model_ids[model_index]

        logger.debug('MPI process {} working on model {}'.format(
            mpistate.rank, model_id))

        molprobity_score = run_molprobity_oneline_analysis_and_write_results(
            targetid,
            model_id,
            ensembler_stage,
            model_structure_filename=model_structure_filename,
            models_target_dir=models_target_dir,
        )

        molprobity_scores_sublist.append((model_id, molprobity_score))

    molprobity_scores_gathered_list = mpistate.comm.gather(
        molprobity_scores_sublist, root=0)
    if mpistate.rank == 0:
        molprobity_scores_list_of_tuples = [
            item for sublist in molprobity_scores_gathered_list
            for item in sublist
        ]
        molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples,
                                          key=lambda x: x[1])
        write_molprobity_scores_list(molprobity_scores_sorted,
                                     molprobity_results_filepath)
コード例 #5
0
 def __init__(self, targetid, project_dir='.', log_level=None):
     check_project_toplevel_dir()
     set_loglevel(log_level)
     self.targetid = targetid
     self.models_target_dir = os.path.join(default_project_dirnames.models, self.targetid)
     if not os.path.exists(self.models_target_dir):
         raise Exception('Model "{}" not found'.format(self.targetid))
     self.project_dir = project_dir
     self.uniprot_mnemonic = '_'.join(self.targetid.split('_')[0:2])
     self._get_models()
     self._get_model_seqs()
     self._get_uniprot_seq()
     self._find_seq_starts_and_ends()
     self._renumber_models()
     self._output_models()
     self._finish()
コード例 #6
0
 def __init__(self, targetid, project_dir='.', log_level=None):
     check_project_toplevel_dir()
     set_loglevel(log_level)
     self.targetid = targetid
     self.models_target_dir = os.path.join(default_project_dirnames.models,
                                           self.targetid)
     if not os.path.exists(self.models_target_dir):
         raise Exception('Model "{}" not found'.format(self.targetid))
     self.project_dir = project_dir
     self.uniprot_mnemonic = '_'.join(self.targetid.split('_')[0:2])
     self._get_models()
     self._get_model_seqs()
     self._get_uniprot_seq()
     self._find_seq_starts_and_ends()
     self._renumber_models()
     self._output_models()
     self._finish()
コード例 #7
0
ファイル: validation.py プロジェクト: sonyahanson/ensembler
def molprobity_validation(targetid, ensembler_stage=None, loglevel=None):
    set_loglevel(loglevel)
    valid_model_ids = []
    if mpistate.rank == 0:
        if ensembler_stage is None:
            ensembler_stage = get_most_advanced_ensembler_modeling_stage(targetid)
        valid_model_ids = get_valid_model_ids(ensembler_stage, targetid)
    if ensembler_stage is None:
        ensembler_stage = mpistate.comm.bcast(ensembler_stage, root=0)
    valid_model_ids = mpistate.comm.bcast(valid_model_ids, root=0)
    nvalid_model_ids = len(valid_model_ids)
    model_structure_filename = model_filenames_by_ensembler_stage[ensembler_stage]

    models_target_dir = os.path.join(default_project_dirnames.models, targetid)
    molprobity_results_filepath = os.path.join(
        models_target_dir, "validation_scores_sorted-molprobity-{}".format(ensembler_stage)
    )

    molprobity_scores_sublist = []
    for model_index in range(mpistate.rank, nvalid_model_ids, mpistate.size):
        model_id = valid_model_ids[model_index]

        logger.debug("MPI process {} working on model {}".format(mpistate.rank, model_id))

        molprobity_score = run_molprobity_oneline_analysis_and_write_results(
            targetid,
            model_id,
            ensembler_stage,
            model_structure_filename=model_structure_filename,
            models_target_dir=models_target_dir,
        )

        molprobity_scores_sublist.append((model_id, molprobity_score))

    molprobity_scores_gathered_list = mpistate.comm.gather(molprobity_scores_sublist, root=0)
    if mpistate.rank == 0:
        molprobity_scores_list_of_tuples = [item for sublist in molprobity_scores_gathered_list for item in sublist]
        molprobity_scores_sorted = sorted(molprobity_scores_list_of_tuples, key=lambda x: x[1])
        write_molprobity_scores_list(molprobity_scores_sorted, molprobity_results_filepath)
コード例 #8
0
def test_pdbfix_ABL1_HUMAN_D0_2E2B_B():
    set_loglevel('debug')
    template_pdb_gz_filepath = get_installed_resource_filename(os.path.join('tests', 'resources', 'ABL1_HUMAN_D0_2E2B_B.pdb.gz'))
    template_pdb_filepath = os.path.join(ensembler.core.default_project_dirnames.templates_structures_resolved, 'ABL1_HUMAN_D0_2E2B_B.pdb')
    with ensembler.utils.enter_temp_dir():
        ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_resolved)
        ensembler.utils.create_dir(ensembler.core.default_project_dirnames.templates_structures_modeled_loops)
        with gzip.open(template_pdb_gz_filepath) as template_pdb_gz_file:
            with open(template_pdb_filepath, 'w') as template_pdb_file:
                contents = template_pdb_gz_file.read()
                if type(contents) == bytes:
                    contents = contents.decode('utf-8')
                template_pdb_file.write(contents)
        template = Mock()
        template.id = 'ABL1_HUMAN_D0_2E2B_B'
        template.seq = 'ITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQ'

        missing_residues = pdbfix_template(template)

        assert (0, 271) not in missing_residues
        assert missing_residues == {
            (0, 32): ['LYS', 'GLU', 'ASP', 'THR', 'MET'],
            (0, 139): ['ARG', 'LEU', 'MET', 'THR', 'GLY', 'ASP'],
        }
コード例 #9
0
def package_for_fah(process_only_these_targets=None,
                    process_only_these_templates=None,
                    model_seqid_cutoff=None,
                    model_validation_score_cutoff=None,
                    model_validation_score_percentile=None,
                    nclones=1,
                    archive=False,
                    openmm_platform='Reference',
                    temperature=300.0 * unit.kelvin,
                    collision_rate=1.0 / unit.picosecond,
                    timestep=2.0 * unit.femtoseconds,
                    loglevel=None):
    """
    Create the input files and directory structure necessary to start a Folding@Home project.

    MPI-enabled.

    Parameters
    ----------
    archive : Bool
        A .tgz compressed archive will be created for each individual RUN directory.
    """
    set_loglevel(loglevel)

    if mpistate.rank == 0:
        if not os.path.exists(fah_projects_dir):
            os.mkdir(fah_projects_dir)
    mpistate.comm.Barrier()

    targets, templates_resolved_seq = get_targets_and_templates()

    for target in targets:
        if process_only_these_targets and (target.id
                                           not in process_only_these_targets):
            continue

        target_project_dir = os.path.join(fah_projects_dir, target.id)

        models_target_dir = os.path.join(default_project_dirnames.models,
                                         target.id)
        if not os.path.exists(models_target_dir):
            continue

        mpistate.comm.Barrier()

        sorted_valid_templates = []
        system = None
        renumbered_resnums = {}

        if mpistate.rank == 0:
            logger.info(
                '-------------------------------------------------------------------------'
            )
            logger.info('Building FAH OpenMM project for target {}'.format(
                target.id))
            logger.info(
                '-------------------------------------------------------------------------'
            )

            valid_templates = get_valid_templates_for_target(
                target,
                templates_resolved_seq,
                process_only_these_templates=process_only_these_templates,
                model_seqid_cutoff=model_seqid_cutoff,
                model_validation_score_cutoff=model_validation_score_cutoff,
                model_validation_score_percentile=
                model_validation_score_percentile)

            sorted_valid_templates = sort_valid_templates_by_seqid(
                target, valid_templates)

            create_target_project_dir(target)

            system = setup_system_and_integrator_files(
                target, sorted_valid_templates[0], temperature, collision_rate,
                timestep)

            renumbered_resnums = get_renumbered_topol_resnums(target)

        sorted_valid_templates = mpistate.comm.bcast(sorted_valid_templates,
                                                     root=0)
        system = mpistate.comm.bcast(system, root=0)
        renumbered_resnums = mpistate.comm.bcast(renumbered_resnums, root=0)

        logger.debug("Building RUNs in parallel...")

        for run_index in range(mpistate.rank, len(sorted_valid_templates),
                               mpistate.size):
            template = sorted_valid_templates[run_index]

            logger.info(
                '-------------------------------------------------------------------------'
            )
            logger.info('Building RUN{} for template {}'.format(
                run_index, template))
            logger.info(
                '-------------------------------------------------------------------------'
            )

            source_dir = os.path.join(models_target_dir, template)
            generate_fah_run(
                target_project_dir,
                template,
                source_dir,
                system,
                run_index,
                nclones,
                temperature,
                collision_rate,
                timestep,
                openmm_platform,
                renumbered_resnums,
            )

            if archive:
                tgz_fah_run(target, run_index)

    mpistate.comm.Barrier()
    if mpistate.rank == 0:
        logger.info('Done.')
コード例 #10
0
ファイル: packaging.py プロジェクト: choderalab/ensembler
def package_for_fah(process_only_these_targets=None,
                    process_only_these_templates=None,
                    model_seqid_cutoff=None,
                    model_validation_score_cutoff=None,
                    model_validation_score_percentile=None,
                    nclones=1, archive=False,
                    openmm_platform='Reference',
                    temperature=300.0 * unit.kelvin,
                    collision_rate=1.0 / unit.picosecond,
                    timestep=2.0 * unit.femtoseconds,
                    loglevel=None):
    """
    Create the input files and directory structure necessary to start a Folding@Home project.

    MPI-enabled.

    Parameters
    ----------
    archive : Bool
        A .tgz compressed archive will be created for each individual RUN directory.
    """
    set_loglevel(loglevel)

    if mpistate.rank == 0:
        if not os.path.exists(fah_projects_dir):
            os.mkdir(fah_projects_dir)
    mpistate.comm.Barrier()

    targets, templates_resolved_seq = get_targets_and_templates()

    for target in targets:
        if process_only_these_targets and (target.id not in process_only_these_targets):
            continue

        target_project_dir = os.path.join(fah_projects_dir, target.id)

        models_target_dir = os.path.join(default_project_dirnames.models, target.id)
        if not os.path.exists(models_target_dir):
            continue

        mpistate.comm.Barrier()

        sorted_valid_templates = []
        system = None
        renumbered_resnums = {}

        if mpistate.rank == 0:
            logger.info('-------------------------------------------------------------------------')
            logger.info('Building FAH OpenMM project for target {}'.format(target.id))
            logger.info('-------------------------------------------------------------------------')

            valid_templates = get_valid_templates_for_target(
                target,
                templates_resolved_seq,
                process_only_these_templates=process_only_these_templates,
                model_seqid_cutoff=model_seqid_cutoff,
                model_validation_score_cutoff=model_validation_score_cutoff,
                model_validation_score_percentile=model_validation_score_percentile
            )

            sorted_valid_templates = sort_valid_templates_by_seqid(
                target,
                valid_templates
            )

            create_target_project_dir(target)

            system = setup_system_and_integrator_files(
                target,
                sorted_valid_templates[0],
                temperature,
                collision_rate,
                timestep
            )

            renumbered_resnums = get_renumbered_topol_resnums(target)

        sorted_valid_templates = mpistate.comm.bcast(sorted_valid_templates, root=0)
        system = mpistate.comm.bcast(system, root=0)
        renumbered_resnums = mpistate.comm.bcast(renumbered_resnums, root=0)

        logger.debug("Building RUNs in parallel...")

        for run_index in range(mpistate.rank, len(sorted_valid_templates), mpistate.size):
            template = sorted_valid_templates[run_index]

            logger.info('-------------------------------------------------------------------------')
            logger.info(
                'Building RUN{} for template {}'.format(
                    run_index, template
                )
            )
            logger.info('-------------------------------------------------------------------------')

            source_dir = os.path.join(models_target_dir, template)
            generate_fah_run(
                target_project_dir,
                template,
                source_dir,
                system,
                run_index,
                nclones,
                temperature,
                collision_rate,
                timestep,
                openmm_platform,
                renumbered_resnums,
            )

            if archive:
                tgz_fah_run(target, run_index)

    mpistate.comm.Barrier()
    if mpistate.rank == 0:
        logger.info('Done.')