Ejemplo n.º 1
0
def test_command_gather_targets_from_uniprot():
    with integrationtest_context(set_up_project_stage='init'):
        ref_fasta = """\
>EGFR_HUMAN_D0
FKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDN
PHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDR
RLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIY
THQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCW
MIDADSRPKFRELIIEFSKMARDPQRYL
>KC1D_HUMAN_D0
YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI
RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD
VKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGI
EQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEF
ATYLNFCRSLRFDDKPDYSYLRQLFRNLF
"""

        args = {
            '--gather_from': 'uniprot',
            '--query': 'mnemonic:EGFR_HUMAN OR mnemonic:KC1D_HUMAN',
            '--dbapi_uri': False,
            '--uniprot_domain_regex': '^Protein kinase',
            '--verbose': False,
            '--help': False,
        }
        ensembler.cli_commands.gather_targets.dispatch(args)
        test_fasta = open(
            os.path.join(ensembler.core.default_project_dirnames.targets,
                         'targets.fa')).read()
        assert test_fasta == ref_fasta
Ejemplo n.º 2
0
def test_align_command():
    ref_resources_dirpath = get_installed_resource_filename(os.path.join('tests', 'example_project'))
    with integrationtest_context(set_up_project_stage='templates_modeled_loops'):
        targets = ['KC1D_HUMAN_D0', 'EGFR_HUMAN_D0']
        templates = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
        args = {
            '--targets': ','.join(targets),
            '--targetsfile': False,
            '--templates': ','.join(templates),
            '--templatesfile': False,
            '--verbose': False,
        }

        ensembler.cli_commands.align.dispatch(args)
        for target in targets:
            naln_files = 0
            for dir, subdirs, files in os.walk(os.path.join(ensembler.core.default_project_dirnames.models, target)):
                for file in files:
                    if file == 'alignment.pir':
                        naln_files += 1
            assert naln_files == len(templates)

        for target in targets:
            seqid_filepath = os.path.join(ensembler.core.default_project_dirnames.models, target, 'sequence-identities.txt')
            ref_seqid_filepath = os.path.join(ref_resources_dirpath, seqid_filepath)
            with open(seqid_filepath) as seqid_file:
                seqid_file_text = seqid_file.read()
            with open(ref_seqid_filepath) as ref_seqid_file:
                ref_seqid_file_text = ref_seqid_file.read()
            print(seqid_file_text)
            print(ref_seqid_file_text)
            assert seqid_file_text == ref_seqid_file_text
Ejemplo n.º 3
0
def test_command_gather_targets_from_uniprot():
    with integrationtest_context(set_up_project_stage='init'):
        ref_fasta = """\
>EGFR_HUMAN_D0
FKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDN
PHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDR
RLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIY
THQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCW
MIDADSRPKFRELIIEFSKMARDPQRYL
>KC1D_HUMAN_D0
YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI
RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD
VKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGI
EQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEF
ATYLNFCRSLRFDDKPDYSYLRQLFRNLF
"""

        args = {
            '--gather_from': 'uniprot',
            '--query': 'mnemonic:EGFR_HUMAN OR mnemonic:KC1D_HUMAN',
            '--dbapi_uri': False,
            '--uniprot_domain_regex': '^Protein kinase',
            '--verbose': False,
            '--help': False,
        }
        ensembler.cli_commands.gather_targets.dispatch(args)
        test_fasta = open(os.path.join(ensembler.core.default_project_dirnames.targets, 'targets.fa')).read()
        assert test_fasta == ref_fasta
Ejemplo n.º 4
0
def test_refine_explicit_md_short():
    with integrationtest_context(set_up_project_stage='solvated'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_explicit_md(
            process_only_these_targets=[targetid],
            process_only_these_templates=[templateid],
            sim_length=2.0*unit.femtosecond,
            nsteps_per_iteration=1,
            verbose=True
        )
        explicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml'
        )
        explicit_model_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz'
        )
        explicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt'
        )
        explicit_log_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml'
        )

        assert all(map(
            os.path.exists,
            [explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath]
        ))
        with open(explicit_log_filepath) as explicit_log_file:
            explicit_log = yaml.load(explicit_log_file)
        assert explicit_log.get('finished') is True
        assert explicit_log.get('successful') is True
        explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
Ejemplo n.º 5
0
def test_refine_explicit_md_short():
    with integrationtest_context(set_up_project_stage='solvated'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_explicit_md(process_only_these_targets=[targetid],
                           process_only_these_templates=[templateid],
                           sim_length=2.0 * unit.femtosecond,
                           nsteps_per_iteration=1,
                           verbose=True)
        explicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid,
            'refine_explicit_md-meta0.yaml')
        explicit_model_filepath = os.path.join(default_project_dirnames.models,
                                               targetid, templateid,
                                               'explicit-refined.pdb.gz')
        explicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid,
            'explicit-energies.txt')
        explicit_log_filepath = os.path.join(default_project_dirnames.models,
                                             targetid, templateid,
                                             'explicit-log.yaml')

        assert all(
            map(os.path.exists, [
                explicit_model_filepath, explicit_energies_filepath,
                explicit_log_filepath
            ]))
        with open(explicit_log_filepath) as explicit_log_file:
            explicit_log = yaml.load(explicit_log_file)
        assert explicit_log.get('finished') is True
        assert explicit_log.get('successful') is True
        explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
Ejemplo n.º 6
0
def test_get_valid_model_filepaths():
    targetid = 'EGFR_HUMAN_D0'
    templateids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
    with integrationtest_context('refined_implicit'):
        valid_model_filenames = get_valid_model_ids('refine_implicit_md',
                                                    targetid)
        assert all([fpath in templateids for fpath in valid_model_filenames])
Ejemplo n.º 7
0
def test_build_models_command():
    with integrationtest_context(set_up_project_stage='aligned'):
        args = {
            '--targets':
            'EGFR_HUMAN_D0',
            '--targetsfile':
            None,
            '--model_seqid_cutoff':
            None,
            '--templates':
            ','.join(['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']),
            '--templatesfile':
            None,
            '--write_modeller_restraints_file':
            None,
            '--verbose':
            False,
            '--help':
            False,
        }
        ensembler.cli_commands.build_models.dispatch(args)
        assert os.path.exists(
            os.path.join(ensembler.core.default_project_dirnames.models,
                         'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'model.pdb'))
        assert os.path.exists(
            os.path.join(ensembler.core.default_project_dirnames.models,
                         'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'model.pdb'))
        assert not os.path.exists(
            os.path.join(ensembler.core.default_project_dirnames.models,
                         'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D',
                         'restraints.rsr.gz'))
        assert not os.path.exists(
            os.path.join(ensembler.core.default_project_dirnames.models,
                         'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A',
                         'restraints.rsr.gz'))
Ejemplo n.º 8
0
def test_gather_templates_from_pdb():
    ref_templates_resolved_seq = """\
>KC1D_HUMAN_D0_4KB8_A
YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI
RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD
VKPDNFLMGLGKKGNLVYIIDFGLAKKYGTARYASINTHLGIEQSRRDDLESLGYVLMYF
NLGSLPWQGLKERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFR
NLF
>KC1D_HUMAN_D0_4KB8_D
YRLGRKIGDIYLGTDIAAGEEVAIKLECPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYN
VMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGL
GKKGNLVYIIDFGLAKKYRDAQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGY
VLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFD
DKPDYSYLRQLFRNLF
"""
    with integrationtest_context(set_up_project_stage='targets'):
        pdbids = ['4KB8']
        chainids = {'4KB8': ['A', 'D']}
        uniprot_domain_regex = '^Protein kinase'
        ensembler.initproject.gather_templates_from_pdb(pdbids,
                                                        uniprot_domain_regex,
                                                        chainids=chainids)
        assert open(
            os.path.join(ensembler.core.default_project_dirnames.templates,
                         'templates-resolved-seq.fa')).read(
                         ) == ref_templates_resolved_seq
Ejemplo n.º 9
0
def test_get_structure_files_for_single_pdbchain():
    with integrationtest_context(set_up_project_stage='targets'):
        ensembler.initproject.get_structure_files_for_single_pdbchain('1OPL')
        assert os.path.exists(os.path.join(
            ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz'
        ))
        assert os.path.exists(os.path.join(
            ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz'
        ))
Ejemplo n.º 10
0
def test_cluster_models_command():
    with integrationtest_context(set_up_project_stage='modeled'):
        args = {
            '--targetsfile': False,
            '--targets': False,
            '--cutoff': False,
            '--verbose': False,
            '--help': False,
        }
        ensembler.cli_commands.cluster.dispatch(args)
Ejemplo n.º 11
0
def test_loopmodel_KC1D_HUMAN_D0_4HNF_A():
    with integrationtest_context(set_up_project_stage='templates_resolved'):
        template = Mock()
        template.id = 'KC1D_HUMAN_D0_4HNF_A'
        template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF'

        missing_residues = pdbfix_template(template)
        loopmodel_template(template, missing_residues)

        assert os.path.exists(os.path.join('templates', 'structures-modeled-loops', 'KC1D_HUMAN_D0_4HNF_A.pdb'))
Ejemplo n.º 12
0
def test_loopmodel_KC1D_HUMAN_D0_4HNF_A():
    with integrationtest_context(set_up_project_stage='templates_resolved'):
        template = Mock()
        template.id = 'KC1D_HUMAN_D0_4HNF_A'
        template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF'

        missing_residues = pdbfix_template(template)
        loopmodel_template(template, missing_residues)

        assert os.path.exists(os.path.join('templates', 'structures-modeled-loops', 'KC1D_HUMAN_D0_4HNF_A.pdb'))
Ejemplo n.º 13
0
def test_cluster_models_command():
    with integrationtest_context(set_up_project_stage='modeled'):
        args = {
            '--targetsfile': False,
            '--targets': False,
            '--cutoff': False,
            '--verbose': False,
            '--help': False,
        }
        ensembler.cli_commands.cluster.dispatch(args)
Ejemplo n.º 14
0
def test_refine_implicit_md_short():
    with integrationtest_context(set_up_project_stage='clustered'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        #targetid = 'MYB_HUMAN_D0'
        #templateid = 'MYB_MOUSE_D0_1GUU'
        refine_implicit_md(process_only_these_targets=[targetid],
                           process_only_these_templates=[templateid],
                           sim_length=2.0 * unit.femtosecond,
                           nsteps_per_iteration=1,
                           minimization_steps=1,
                           loglevel='debug')
        implicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid,
            'refine_implicit_md-meta0.yaml')
        implicit_model_filepath = os.path.join(default_project_dirnames.models,
                                               targetid, templateid,
                                               'implicit-refined.pdb.gz')
        implicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid,
            'implicit-energies.txt')
        implicit_log_filepath = os.path.join(default_project_dirnames.models,
                                             targetid, templateid,
                                             'implicit-log.yaml')

        assert os.path.exists(implicit_model_filepath
                              ), "%s does not exist" % implicit_model_filepath
        assert os.path.exists(
            implicit_energies_filepath
        ), "%s does not exist" % implicit_energies_filepath
        assert os.path.exists(
            implicit_log_filepath), "%s does not exist" % implicit_log_filepath

        with open(implicit_log_filepath) as implicit_log_file:
            implicit_log = yaml.load(implicit_log_file)
        assert implicit_log.get('finished') is True
        assert implicit_log.get('successful') is True
        assert implicit_log.get('ph') == 8.0
        assert os.path.exists(implicit_metadata_filepath)
        with open(implicit_metadata_filepath) as implicit_metadata_file:
            implicit_metadata = yaml.load(implicit_metadata_file)
        assert implicit_metadata.get('refine_implicit_md').get(
            'custom_residue_variants'
        ) == {
            'EGFR_HUMAN_D0': {
                49: 'ASH'
            }
            #assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == {
            #    'MYB_HUMAN_D0': {47: 'ASH'}
        }
        implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath)
        resis = [resi for resi in implicit_model_traj.top.residues]
        resi49 = resis[49]
        resi49_atom_strings = [str(atom) for atom in resi49.atoms]
        assert 'ASP50-HD2' in resi49_atom_strings
Ejemplo n.º 15
0
def test_get_structure_files_for_single_pdbchain():
    with integrationtest_context(set_up_project_stage='targets'):
        ensembler.initproject.get_structure_files_for_single_pdbchain('1OPL')
        assert os.path.exists(
            os.path.join(
                ensembler.core.default_project_dirnames.structures_pdb,
                '1OPL.pdb.gz'))
        assert os.path.exists(
            os.path.join(
                ensembler.core.default_project_dirnames.structures_sifts,
                '1OPL.xml.gz'))
Ejemplo n.º 16
0
def test_get_structure_files_bad_structure_dir():
    with integrationtest_context(set_up_project_stage='targets'):
        pdbchains = [{'pdbid': '1OPL'}]
        ensembler.initproject.get_structure_files(pdbchains,
                                                  structure_dirs=['BlAh1'])
        assert os.path.exists(
            os.path.join(
                ensembler.core.default_project_dirnames.structures_pdb,
                '1OPL.pdb.gz'))
        assert os.path.exists(
            os.path.join(
                ensembler.core.default_project_dirnames.structures_sifts,
                '1OPL.xml.gz'))
Ejemplo n.º 17
0
def test_get_structure_files_bad_structure_dir():
    with integrationtest_context(set_up_project_stage='targets'):
        pdbchains = [
            {
                'pdbid': '1OPL'
            }
        ]
        ensembler.initproject.get_structure_files(pdbchains, structure_dirs=['BlAh1'])
        assert os.path.exists(os.path.join(
            ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz'
        ))
        assert os.path.exists(os.path.join(
            ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz'
        ))
Ejemplo n.º 18
0
def test_package_for_fah():
    with integrationtest_context(set_up_project_stage='refined_explicit'):
        package_for_fah(process_only_these_targets=['EGFR_HUMAN_D0'],
                        process_only_these_templates=[
                            'KC1D_HUMAN_D0_4HNF_A', 'KC1D_HUMAN_D0_4KB8_D'
                        ])
        packaged_project_base_path = os.path.join(
            default_project_dirnames.packaged_models, 'fah-projects',
            'EGFR_HUMAN_D0')
        assert os.path.exists(packaged_project_base_path)
        assert os.path.exists(os.path.join(packaged_project_base_path, 'RUN0'))
        assert os.path.exists(os.path.join(packaged_project_base_path, 'RUN1'))
        target_filenames = [
            'system.xml',
            'integrator.xml',
        ]

        for target_filename in target_filenames:
            assert os.path.exists(
                os.path.join(packaged_project_base_path, target_filename))

        run_filenames = [
            'template.txt',
            'system.pdb',
            'protein.pdb',
            'sequence-identity.txt',
            'state0.xml',
        ]

        for run_id in range(2):
            for run_filename in run_filenames:
                assert os.path.exists(
                    os.path.join(packaged_project_base_path,
                                 'RUN{}'.format(run_id), run_filename))

        # test whether kinetic energy in new state file is reasonable
        test_state_filepath = os.path.join(packaged_project_base_path, 'RUN0',
                                           'state0.xml')
        with open(test_state_filepath) as test_state_file:
            test_state = mm.XmlSerializer.deserialize(test_state_file.read())
        ref_state_filepath = get_installed_resource_filename(
            os.path.join('tests', 'example_project', 'models', 'EGFR_HUMAN_D0',
                         'KC1D_HUMAN_D0_4HNF_A', 'explicit-state.xml.gz'))
        with gzip.open(ref_state_filepath) as ref_state_file:
            ref_state = mm.XmlSerializer.deserialize(ref_state_file.read())
        test_state_kinetic_energy = test_state.getKineticEnergy()
        ref_state_kinetic_energy = ref_state.getKineticEnergy()
        assert abs(test_state_kinetic_energy -
                   ref_state_kinetic_energy) < ref_state_kinetic_energy
Ejemplo n.º 19
0
def test_loopmodel_KC1D_HUMAN_D0_3UZP_A():
    """
    No missing residues
    """
    with integrationtest_context(set_up_project_stage='templates_resolved'):
        template = Mock()
        template.id = 'KC1D_HUMAN_D0_3UZP_A'
        template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF'

        missing_residues = pdbfix_template(template)
        loopmodel_template(template, missing_residues)

        assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A.pdb'))
        log = yaml.load(open(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A-loopmodel-log.yaml')))
        assert log['no_missing_residues'] == True
Ejemplo n.º 20
0
def test_loopmodel_KC1D_HUMAN_D0_3UZP_A():
    """
    No missing residues
    """
    with integrationtest_context(set_up_project_stage='templates_resolved'):
        template = Mock()
        template.id = 'KC1D_HUMAN_D0_3UZP_A'
        template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF'

        missing_residues = pdbfix_template(template)
        loopmodel_template(template, missing_residues)

        assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A.pdb'))
        log = yaml.load(open(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A-loopmodel-log.yaml')))
        assert log['no_missing_residues'] == True
Ejemplo n.º 21
0
def test_build_models_command():
    with integrationtest_context(set_up_project_stage='aligned'):
        args = {
            '--targets': 'EGFR_HUMAN_D0',
            '--targetsfile': None,
            '--model_seqid_cutoff': None,
            '--templates': ','.join(['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']),
            '--templatesfile': None,
            '--write_modeller_restraints_file': None,
            '--verbose': False,
            '--help': False,
        }
        ensembler.cli_commands.build_models.dispatch(args)
        assert os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'model.pdb'))
        assert os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'model.pdb'))
        assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'restraints.rsr.gz'))
        assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'restraints.rsr.gz'))
Ejemplo n.º 22
0
def test_refine_implicit_md_short():
    with integrationtest_context(set_up_project_stage='clustered'):
        targetid = 'EGFR_HUMAN_D0'
        templateid = 'KC1D_HUMAN_D0_4KB8_D'
        refine_implicit_md(
            process_only_these_targets=[targetid],
            process_only_these_templates=[templateid],
            sim_length=2.0*unit.femtosecond,
            nsteps_per_iteration=1,
            minimization_steps=1,
            loglevel='debug'
        )
        implicit_metadata_filepath = os.path.join(
            default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml'
        )
        implicit_model_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz'
        )
        implicit_energies_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt'
        )
        implicit_log_filepath = os.path.join(
            default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml'
        )

        assert all(map(
            os.path.exists,
            [implicit_model_filepath, implicit_energies_filepath, implicit_log_filepath]
        ))
        with open(implicit_log_filepath) as implicit_log_file:
            implicit_log = yaml.load(implicit_log_file)
        assert implicit_log.get('finished') is True
        assert implicit_log.get('successful') is True
        assert implicit_log.get('ph') == 8.0
        assert os.path.exists(implicit_metadata_filepath)
        with open(implicit_metadata_filepath) as implicit_metadata_file:
            implicit_metadata = yaml.load(implicit_metadata_file)
        assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == {
            'EGFR_HUMAN_D0': {49: 'ASH'}
        }
        implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath)
        resis = [resi for resi in implicit_model_traj.top.residues]
        resi49 = resis[49]
        resi49_atom_strings = [str(atom) for atom in resi49.atoms]
        assert 'ASP50-HD2' in resi49_atom_strings
Ejemplo n.º 23
0
def test_molprobity_validation():
    target_id = "EGFR_HUMAN_D0"
    template_ids = ["KC1D_HUMAN_D0_4KB8_D", "KC1D_HUMAN_D0_4HNF_A"]
    with integrationtest_context("refined_explicit"):
        molprobity_validation(target_id)
        for template_id in template_ids:
            results_filepath = os.path.join(
                default_project_dirnames.models, target_id, template_id, "molprobity-refine_explicit_md.yaml"
            )
            assert os.path.exists(results_filepath)
            with open(results_filepath) as results_file:
                results_dict = yaml.load(results_file)
                assert results_dict.get("pct_badbonds") == ref_pct_badbonds[template_id]

        target_results_filepath = os.path.join(
            default_project_dirnames.models, target_id, "validation_scores_sorted-molprobity-refine_explicit_md"
        )
        with open(target_results_filepath) as target_results_file:
            target_results = target_results_file.read().splitlines()
            assert target_results[0] == "KC1D_HUMAN_D0_4KB8_D 3.828"
            assert target_results[1] == "KC1D_HUMAN_D0_4HNF_A 4.035"
Ejemplo n.º 24
0
def test_gather_templates_from_pdb():
    ref_templates_resolved_seq = """\
>KC1D_HUMAN_D0_4KB8_A
YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI
RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD
VKPDNFLMGLGKKGNLVYIIDFGLAKKYGTARYASINTHLGIEQSRRDDLESLGYVLMYF
NLGSLPWQGLKERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFR
NLF
>KC1D_HUMAN_D0_4KB8_D
YRLGRKIGDIYLGTDIAAGEEVAIKLECPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYN
VMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGL
GKKGNLVYIIDFGLAKKYRDAQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGY
VLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFD
DKPDYSYLRQLFRNLF
"""
    with integrationtest_context(set_up_project_stage='targets'):
        pdbids = ['4KB8']
        chainids = {'4KB8': ['A', 'D']}
        uniprot_domain_regex = '^Protein kinase'
        ensembler.initproject.gather_templates_from_pdb(pdbids, uniprot_domain_regex, chainids=chainids)
        assert open(os.path.join(ensembler.core.default_project_dirnames.templates, 'templates-resolved-seq.fa')).read() == ref_templates_resolved_seq
Ejemplo n.º 25
0
def test_align_command():
    ref_resources_dirpath = get_installed_resource_filename(
        os.path.join('tests', 'example_project'))
    with integrationtest_context(
            set_up_project_stage='templates_modeled_loops'):
        targets = ['KC1D_HUMAN_D0', 'EGFR_HUMAN_D0']
        templates = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
        args = {
            '--targets': ','.join(targets),
            '--targetsfile': False,
            '--templates': ','.join(templates),
            '--templatesfile': False,
            '--verbose': False,
        }

        ensembler.cli_commands.align.dispatch(args)
        for target in targets:
            naln_files = 0
            for dir, subdirs, files in os.walk(
                    os.path.join(
                        ensembler.core.default_project_dirnames.models,
                        target)):
                for file in files:
                    if file == 'alignment.pir':
                        naln_files += 1
            assert naln_files == len(templates)

        for target in targets:
            seqid_filepath = os.path.join(
                ensembler.core.default_project_dirnames.models, target,
                'sequence-identities.txt')
            ref_seqid_filepath = os.path.join(ref_resources_dirpath,
                                              seqid_filepath)
            with open(seqid_filepath) as seqid_file:
                seqid_file_text = seqid_file.read()
            with open(ref_seqid_filepath) as ref_seqid_file:
                ref_seqid_file_text = ref_seqid_file.read()
            print(seqid_file_text)
            print(ref_seqid_file_text)
            assert seqid_file_text == ref_seqid_file_text
Ejemplo n.º 26
0
def test_molprobity_validation():
    target_id = 'EGFR_HUMAN_D0'
    template_ids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
    with integrationtest_context('refined_explicit'):
        molprobity_validation(target_id)
        for template_id in template_ids:
            results_filepath = os.path.join(
                default_project_dirnames.models, target_id, template_id,
                'molprobity-refine_explicit_md.yaml')
            assert os.path.exists(results_filepath)
            with open(results_filepath) as results_file:
                results_dict = yaml.load(results_file)
                assert results_dict.get(
                    'pct_badbonds') == ref_pct_badbonds[template_id]

        target_results_filepath = os.path.join(
            default_project_dirnames.models, target_id,
            'validation_scores_sorted-molprobity-refine_explicit_md')
        with open(target_results_filepath) as target_results_file:
            target_results = target_results_file.read().splitlines()
            assert target_results[0] == 'KC1D_HUMAN_D0_4KB8_D 3.828'
            assert target_results[1] == 'KC1D_HUMAN_D0_4HNF_A 4.035'
Ejemplo n.º 27
0
def test_cluster_models():
    with integrationtest_context(set_up_project_stage='modeled'):
        ensembler.modeling.cluster_models()
Ejemplo n.º 28
0
def test_cluster_models():
    with integrationtest_context(set_up_project_stage='modeled'):
        ensembler.modeling.cluster_models()
Ejemplo n.º 29
0
def test_package_for_fah():
    with integrationtest_context(set_up_project_stage='refined_explicit'):
        package_for_fah(
            process_only_these_targets=['EGFR_HUMAN_D0'],
            process_only_these_templates=[
                'KC1D_HUMAN_D0_4HNF_A',
                'KC1D_HUMAN_D0_4KB8_D'
            ]
        )
        packaged_project_base_path = os.path.join(
            default_project_dirnames.packaged_models,
            'fah-projects',
            'EGFR_HUMAN_D0'
        )
        assert os.path.exists(packaged_project_base_path)
        assert os.path.exists(os.path.join(
            packaged_project_base_path,
            'RUN0'
        ))
        assert os.path.exists(os.path.join(
            packaged_project_base_path,
            'RUN1'
        ))
        target_filenames = [
            'system.xml',
            'integrator.xml',
        ]

        for target_filename in target_filenames:
            assert os.path.exists(os.path.join(
                packaged_project_base_path,
                target_filename
            ))

        run_filenames = [
            'template.txt',
            'system.pdb',
            'protein.pdb',
            'sequence-identity.txt',
            'state0.xml',
        ]

        for run_id in range(2):
            for run_filename in run_filenames:
                assert os.path.exists(os.path.join(
                    packaged_project_base_path,
                    'RUN{}'.format(run_id),
                    run_filename
                ))

        # test whether kinetic energy in new state file is reasonable
        test_state_filepath = os.path.join(packaged_project_base_path, 'RUN0', 'state0.xml')
        with open(test_state_filepath) as test_state_file:
            test_state = mm.XmlSerializer.deserialize(test_state_file.read())
        ref_state_filepath = get_installed_resource_filename(os.path.join(
            'tests', 'example_project', 'models',
            'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'explicit-state.xml.gz'
        ))
        with gzip.open(ref_state_filepath) as ref_state_file:
            ref_state = mm.XmlSerializer.deserialize(ref_state_file.read())
        test_state_kinetic_energy = test_state.getKineticEnergy()
        ref_state_kinetic_energy = ref_state.getKineticEnergy()
        assert abs(
            test_state_kinetic_energy - ref_state_kinetic_energy
        ) < ref_state_kinetic_energy
Ejemplo n.º 30
0
def test_mktraj_implicit_start():
    with integrationtest_context(set_up_project_stage="refined_explicit"):
        MkTrajImplicitStart(targetid="EGFR_HUMAN_D0", loglevel="debug")
Ejemplo n.º 31
0
def test_loopmodel_logs():
    with integrationtest_context(set_up_project_stage='templates_modeled_loops'):
        loopmodel_logs = LoopmodelLogs()
        loopmodel_logs.add_missing_resis_data()
Ejemplo n.º 32
0
def test_get_valid_model_filepaths():
    targetid = 'EGFR_HUMAN_D0'
    templateids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']
    with integrationtest_context('refined_implicit'):
        valid_model_filenames = get_valid_model_ids('refine_implicit_md', targetid)
        assert all([fpath in templateids for fpath in valid_model_filenames])
Ejemplo n.º 33
0
def test_mktraj_implicit_start():
    with integrationtest_context(set_up_project_stage='refined_explicit'):
        MkTrajImplicitStart(targetid='EGFR_HUMAN_D0', loglevel='debug')
Ejemplo n.º 34
0
def test_loopmodel_logs():
    with integrationtest_context(
            set_up_project_stage='templates_modeled_loops'):
        loopmodel_logs = LoopmodelLogs()
        loopmodel_logs.add_missing_resis_data()