def test_command_gather_targets_from_uniprot(): with integrationtest_context(set_up_project_stage='init'): ref_fasta = """\ >EGFR_HUMAN_D0 FKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDN PHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDR RLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIY THQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCW MIDADSRPKFRELIIEFSKMARDPQRYL >KC1D_HUMAN_D0 YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD VKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGI EQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEF ATYLNFCRSLRFDDKPDYSYLRQLFRNLF """ args = { '--gather_from': 'uniprot', '--query': 'mnemonic:EGFR_HUMAN OR mnemonic:KC1D_HUMAN', '--dbapi_uri': False, '--uniprot_domain_regex': '^Protein kinase', '--verbose': False, '--help': False, } ensembler.cli_commands.gather_targets.dispatch(args) test_fasta = open( os.path.join(ensembler.core.default_project_dirnames.targets, 'targets.fa')).read() assert test_fasta == ref_fasta
def test_align_command(): ref_resources_dirpath = get_installed_resource_filename(os.path.join('tests', 'example_project')) with integrationtest_context(set_up_project_stage='templates_modeled_loops'): targets = ['KC1D_HUMAN_D0', 'EGFR_HUMAN_D0'] templates = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] args = { '--targets': ','.join(targets), '--targetsfile': False, '--templates': ','.join(templates), '--templatesfile': False, '--verbose': False, } ensembler.cli_commands.align.dispatch(args) for target in targets: naln_files = 0 for dir, subdirs, files in os.walk(os.path.join(ensembler.core.default_project_dirnames.models, target)): for file in files: if file == 'alignment.pir': naln_files += 1 assert naln_files == len(templates) for target in targets: seqid_filepath = os.path.join(ensembler.core.default_project_dirnames.models, target, 'sequence-identities.txt') ref_seqid_filepath = os.path.join(ref_resources_dirpath, seqid_filepath) with open(seqid_filepath) as seqid_file: seqid_file_text = seqid_file.read() with open(ref_seqid_filepath) as ref_seqid_file: ref_seqid_file_text = ref_seqid_file.read() print(seqid_file_text) print(ref_seqid_file_text) assert seqid_file_text == ref_seqid_file_text
def test_command_gather_targets_from_uniprot(): with integrationtest_context(set_up_project_stage='init'): ref_fasta = """\ >EGFR_HUMAN_D0 FKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDN PHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDR RLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIY THQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCW MIDADSRPKFRELIIEFSKMARDPQRYL >KC1D_HUMAN_D0 YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD VKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGI EQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEF ATYLNFCRSLRFDDKPDYSYLRQLFRNLF """ args = { '--gather_from': 'uniprot', '--query': 'mnemonic:EGFR_HUMAN OR mnemonic:KC1D_HUMAN', '--dbapi_uri': False, '--uniprot_domain_regex': '^Protein kinase', '--verbose': False, '--help': False, } ensembler.cli_commands.gather_targets.dispatch(args) test_fasta = open(os.path.join(ensembler.core.default_project_dirnames.targets, 'targets.fa')).read() assert test_fasta == ref_fasta
def test_refine_explicit_md_short(): with integrationtest_context(set_up_project_stage='solvated'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_explicit_md( process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0*unit.femtosecond, nsteps_per_iteration=1, verbose=True ) explicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml' ) explicit_model_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz' ) explicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt' ) explicit_log_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml' ) assert all(map( os.path.exists, [explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath] )) with open(explicit_log_filepath) as explicit_log_file: explicit_log = yaml.load(explicit_log_file) assert explicit_log.get('finished') is True assert explicit_log.get('successful') is True explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
def test_refine_explicit_md_short(): with integrationtest_context(set_up_project_stage='solvated'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_explicit_md(process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0 * unit.femtosecond, nsteps_per_iteration=1, verbose=True) explicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_explicit_md-meta0.yaml') explicit_model_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'explicit-refined.pdb.gz') explicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'explicit-energies.txt') explicit_log_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'explicit-log.yaml') assert all( map(os.path.exists, [ explicit_model_filepath, explicit_energies_filepath, explicit_log_filepath ])) with open(explicit_log_filepath) as explicit_log_file: explicit_log = yaml.load(explicit_log_file) assert explicit_log.get('finished') is True assert explicit_log.get('successful') is True explicit_model_traj = mdtraj.load_pdb(explicit_model_filepath)
def test_get_valid_model_filepaths(): targetid = 'EGFR_HUMAN_D0' templateids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] with integrationtest_context('refined_implicit'): valid_model_filenames = get_valid_model_ids('refine_implicit_md', targetid) assert all([fpath in templateids for fpath in valid_model_filenames])
def test_build_models_command(): with integrationtest_context(set_up_project_stage='aligned'): args = { '--targets': 'EGFR_HUMAN_D0', '--targetsfile': None, '--model_seqid_cutoff': None, '--templates': ','.join(['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']), '--templatesfile': None, '--write_modeller_restraints_file': None, '--verbose': False, '--help': False, } ensembler.cli_commands.build_models.dispatch(args) assert os.path.exists( os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'model.pdb')) assert os.path.exists( os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'model.pdb')) assert not os.path.exists( os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'restraints.rsr.gz')) assert not os.path.exists( os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'restraints.rsr.gz'))
def test_gather_templates_from_pdb(): ref_templates_resolved_seq = """\ >KC1D_HUMAN_D0_4KB8_A YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD VKPDNFLMGLGKKGNLVYIIDFGLAKKYGTARYASINTHLGIEQSRRDDLESLGYVLMYF NLGSLPWQGLKERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFR NLF >KC1D_HUMAN_D0_4KB8_D YRLGRKIGDIYLGTDIAAGEEVAIKLECPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYN VMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGL GKKGNLVYIIDFGLAKKYRDAQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGY VLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFD DKPDYSYLRQLFRNLF """ with integrationtest_context(set_up_project_stage='targets'): pdbids = ['4KB8'] chainids = {'4KB8': ['A', 'D']} uniprot_domain_regex = '^Protein kinase' ensembler.initproject.gather_templates_from_pdb(pdbids, uniprot_domain_regex, chainids=chainids) assert open( os.path.join(ensembler.core.default_project_dirnames.templates, 'templates-resolved-seq.fa')).read( ) == ref_templates_resolved_seq
def test_get_structure_files_for_single_pdbchain(): with integrationtest_context(set_up_project_stage='targets'): ensembler.initproject.get_structure_files_for_single_pdbchain('1OPL') assert os.path.exists(os.path.join( ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz' )) assert os.path.exists(os.path.join( ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz' ))
def test_cluster_models_command(): with integrationtest_context(set_up_project_stage='modeled'): args = { '--targetsfile': False, '--targets': False, '--cutoff': False, '--verbose': False, '--help': False, } ensembler.cli_commands.cluster.dispatch(args)
def test_loopmodel_KC1D_HUMAN_D0_4HNF_A(): with integrationtest_context(set_up_project_stage='templates_resolved'): template = Mock() template.id = 'KC1D_HUMAN_D0_4HNF_A' template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF' missing_residues = pdbfix_template(template) loopmodel_template(template, missing_residues) assert os.path.exists(os.path.join('templates', 'structures-modeled-loops', 'KC1D_HUMAN_D0_4HNF_A.pdb'))
def test_loopmodel_KC1D_HUMAN_D0_4HNF_A(): with integrationtest_context(set_up_project_stage='templates_resolved'): template = Mock() template.id = 'KC1D_HUMAN_D0_4HNF_A' template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF' missing_residues = pdbfix_template(template) loopmodel_template(template, missing_residues) assert os.path.exists(os.path.join('templates', 'structures-modeled-loops', 'KC1D_HUMAN_D0_4HNF_A.pdb'))
def test_cluster_models_command(): with integrationtest_context(set_up_project_stage='modeled'): args = { '--targetsfile': False, '--targets': False, '--cutoff': False, '--verbose': False, '--help': False, } ensembler.cli_commands.cluster.dispatch(args)
def test_refine_implicit_md_short(): with integrationtest_context(set_up_project_stage='clustered'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' #targetid = 'MYB_HUMAN_D0' #templateid = 'MYB_MOUSE_D0_1GUU' refine_implicit_md(process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0 * unit.femtosecond, nsteps_per_iteration=1, minimization_steps=1, loglevel='debug') implicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml') implicit_model_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz') implicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt') implicit_log_filepath = os.path.join(default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml') assert os.path.exists(implicit_model_filepath ), "%s does not exist" % implicit_model_filepath assert os.path.exists( implicit_energies_filepath ), "%s does not exist" % implicit_energies_filepath assert os.path.exists( implicit_log_filepath), "%s does not exist" % implicit_log_filepath with open(implicit_log_filepath) as implicit_log_file: implicit_log = yaml.load(implicit_log_file) assert implicit_log.get('finished') is True assert implicit_log.get('successful') is True assert implicit_log.get('ph') == 8.0 assert os.path.exists(implicit_metadata_filepath) with open(implicit_metadata_filepath) as implicit_metadata_file: implicit_metadata = yaml.load(implicit_metadata_file) assert implicit_metadata.get('refine_implicit_md').get( 'custom_residue_variants' ) == { 'EGFR_HUMAN_D0': { 49: 'ASH' } #assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == { # 'MYB_HUMAN_D0': {47: 'ASH'} } implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath) resis = [resi for resi in implicit_model_traj.top.residues] resi49 = resis[49] resi49_atom_strings = [str(atom) for atom in resi49.atoms] assert 'ASP50-HD2' in resi49_atom_strings
def test_get_structure_files_for_single_pdbchain(): with integrationtest_context(set_up_project_stage='targets'): ensembler.initproject.get_structure_files_for_single_pdbchain('1OPL') assert os.path.exists( os.path.join( ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz')) assert os.path.exists( os.path.join( ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz'))
def test_get_structure_files_bad_structure_dir(): with integrationtest_context(set_up_project_stage='targets'): pdbchains = [{'pdbid': '1OPL'}] ensembler.initproject.get_structure_files(pdbchains, structure_dirs=['BlAh1']) assert os.path.exists( os.path.join( ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz')) assert os.path.exists( os.path.join( ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz'))
def test_get_structure_files_bad_structure_dir(): with integrationtest_context(set_up_project_stage='targets'): pdbchains = [ { 'pdbid': '1OPL' } ] ensembler.initproject.get_structure_files(pdbchains, structure_dirs=['BlAh1']) assert os.path.exists(os.path.join( ensembler.core.default_project_dirnames.structures_pdb, '1OPL.pdb.gz' )) assert os.path.exists(os.path.join( ensembler.core.default_project_dirnames.structures_sifts, '1OPL.xml.gz' ))
def test_package_for_fah(): with integrationtest_context(set_up_project_stage='refined_explicit'): package_for_fah(process_only_these_targets=['EGFR_HUMAN_D0'], process_only_these_templates=[ 'KC1D_HUMAN_D0_4HNF_A', 'KC1D_HUMAN_D0_4KB8_D' ]) packaged_project_base_path = os.path.join( default_project_dirnames.packaged_models, 'fah-projects', 'EGFR_HUMAN_D0') assert os.path.exists(packaged_project_base_path) assert os.path.exists(os.path.join(packaged_project_base_path, 'RUN0')) assert os.path.exists(os.path.join(packaged_project_base_path, 'RUN1')) target_filenames = [ 'system.xml', 'integrator.xml', ] for target_filename in target_filenames: assert os.path.exists( os.path.join(packaged_project_base_path, target_filename)) run_filenames = [ 'template.txt', 'system.pdb', 'protein.pdb', 'sequence-identity.txt', 'state0.xml', ] for run_id in range(2): for run_filename in run_filenames: assert os.path.exists( os.path.join(packaged_project_base_path, 'RUN{}'.format(run_id), run_filename)) # test whether kinetic energy in new state file is reasonable test_state_filepath = os.path.join(packaged_project_base_path, 'RUN0', 'state0.xml') with open(test_state_filepath) as test_state_file: test_state = mm.XmlSerializer.deserialize(test_state_file.read()) ref_state_filepath = get_installed_resource_filename( os.path.join('tests', 'example_project', 'models', 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'explicit-state.xml.gz')) with gzip.open(ref_state_filepath) as ref_state_file: ref_state = mm.XmlSerializer.deserialize(ref_state_file.read()) test_state_kinetic_energy = test_state.getKineticEnergy() ref_state_kinetic_energy = ref_state.getKineticEnergy() assert abs(test_state_kinetic_energy - ref_state_kinetic_energy) < ref_state_kinetic_energy
def test_loopmodel_KC1D_HUMAN_D0_3UZP_A(): """ No missing residues """ with integrationtest_context(set_up_project_stage='templates_resolved'): template = Mock() template.id = 'KC1D_HUMAN_D0_3UZP_A' template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF' missing_residues = pdbfix_template(template) loopmodel_template(template, missing_residues) assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A.pdb')) log = yaml.load(open(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A-loopmodel-log.yaml'))) assert log['no_missing_residues'] == True
def test_loopmodel_KC1D_HUMAN_D0_3UZP_A(): """ No missing residues """ with integrationtest_context(set_up_project_stage='templates_resolved'): template = Mock() template.id = 'KC1D_HUMAN_D0_3UZP_A' template.seq = 'YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGLGKKGNLVYIIDFGLAKKYRDARTHQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGYVLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFRNLF' missing_residues = pdbfix_template(template) loopmodel_template(template, missing_residues) assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A.pdb')) log = yaml.load(open(os.path.join(ensembler.core.default_project_dirnames.templates_structures_modeled_loops, 'KC1D_HUMAN_D0_3UZP_A-loopmodel-log.yaml'))) assert log['no_missing_residues'] == True
def test_build_models_command(): with integrationtest_context(set_up_project_stage='aligned'): args = { '--targets': 'EGFR_HUMAN_D0', '--targetsfile': None, '--model_seqid_cutoff': None, '--templates': ','.join(['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A']), '--templatesfile': None, '--write_modeller_restraints_file': None, '--verbose': False, '--help': False, } ensembler.cli_commands.build_models.dispatch(args) assert os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'model.pdb')) assert os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'model.pdb')) assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4KB8_D', 'restraints.rsr.gz')) assert not os.path.exists(os.path.join(ensembler.core.default_project_dirnames.models, 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'restraints.rsr.gz'))
def test_refine_implicit_md_short(): with integrationtest_context(set_up_project_stage='clustered'): targetid = 'EGFR_HUMAN_D0' templateid = 'KC1D_HUMAN_D0_4KB8_D' refine_implicit_md( process_only_these_targets=[targetid], process_only_these_templates=[templateid], sim_length=2.0*unit.femtosecond, nsteps_per_iteration=1, minimization_steps=1, loglevel='debug' ) implicit_metadata_filepath = os.path.join( default_project_dirnames.models, targetid, 'refine_implicit_md-meta0.yaml' ) implicit_model_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-refined.pdb.gz' ) implicit_energies_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-energies.txt' ) implicit_log_filepath = os.path.join( default_project_dirnames.models, targetid, templateid, 'implicit-log.yaml' ) assert all(map( os.path.exists, [implicit_model_filepath, implicit_energies_filepath, implicit_log_filepath] )) with open(implicit_log_filepath) as implicit_log_file: implicit_log = yaml.load(implicit_log_file) assert implicit_log.get('finished') is True assert implicit_log.get('successful') is True assert implicit_log.get('ph') == 8.0 assert os.path.exists(implicit_metadata_filepath) with open(implicit_metadata_filepath) as implicit_metadata_file: implicit_metadata = yaml.load(implicit_metadata_file) assert implicit_metadata.get('refine_implicit_md').get('custom_residue_variants') == { 'EGFR_HUMAN_D0': {49: 'ASH'} } implicit_model_traj = mdtraj.load_pdb(implicit_model_filepath) resis = [resi for resi in implicit_model_traj.top.residues] resi49 = resis[49] resi49_atom_strings = [str(atom) for atom in resi49.atoms] assert 'ASP50-HD2' in resi49_atom_strings
def test_molprobity_validation(): target_id = "EGFR_HUMAN_D0" template_ids = ["KC1D_HUMAN_D0_4KB8_D", "KC1D_HUMAN_D0_4HNF_A"] with integrationtest_context("refined_explicit"): molprobity_validation(target_id) for template_id in template_ids: results_filepath = os.path.join( default_project_dirnames.models, target_id, template_id, "molprobity-refine_explicit_md.yaml" ) assert os.path.exists(results_filepath) with open(results_filepath) as results_file: results_dict = yaml.load(results_file) assert results_dict.get("pct_badbonds") == ref_pct_badbonds[template_id] target_results_filepath = os.path.join( default_project_dirnames.models, target_id, "validation_scores_sorted-molprobity-refine_explicit_md" ) with open(target_results_filepath) as target_results_file: target_results = target_results_file.read().splitlines() assert target_results[0] == "KC1D_HUMAN_D0_4KB8_D 3.828" assert target_results[1] == "KC1D_HUMAN_D0_4HNF_A 4.035"
def test_gather_templates_from_pdb(): ref_templates_resolved_seq = """\ >KC1D_HUMAN_D0_4KB8_A YRLGRKIGSGSFGDIYLGTDIAAGEEVAIKLECVKTKHPQLHIESKIYKMMQGGVGIPTI RWCGAEGDYNVMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRD VKPDNFLMGLGKKGNLVYIIDFGLAKKYGTARYASINTHLGIEQSRRDDLESLGYVLMYF NLGSLPWQGLKERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFDDKPDYSYLRQLFR NLF >KC1D_HUMAN_D0_4KB8_D YRLGRKIGDIYLGTDIAAGEEVAIKLECPQLHIESKIYKMMQGGVGIPTIRWCGAEGDYN VMVMELLGPSLEDLFNFCSRKFSLKTVLLLADQMISRIEYIHSKNFIHRDVKPDNFLMGL GKKGNLVYIIDFGLAKKYRDAQHIPYRENKNLTGTARYASINTHLGIEQSRRDDLESLGY VLMYFNLGSLPWQGLKAATKRQKYERISEKKMSTPIEVLCKGYPSEFATYLNFCRSLRFD DKPDYSYLRQLFRNLF """ with integrationtest_context(set_up_project_stage='targets'): pdbids = ['4KB8'] chainids = {'4KB8': ['A', 'D']} uniprot_domain_regex = '^Protein kinase' ensembler.initproject.gather_templates_from_pdb(pdbids, uniprot_domain_regex, chainids=chainids) assert open(os.path.join(ensembler.core.default_project_dirnames.templates, 'templates-resolved-seq.fa')).read() == ref_templates_resolved_seq
def test_align_command(): ref_resources_dirpath = get_installed_resource_filename( os.path.join('tests', 'example_project')) with integrationtest_context( set_up_project_stage='templates_modeled_loops'): targets = ['KC1D_HUMAN_D0', 'EGFR_HUMAN_D0'] templates = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] args = { '--targets': ','.join(targets), '--targetsfile': False, '--templates': ','.join(templates), '--templatesfile': False, '--verbose': False, } ensembler.cli_commands.align.dispatch(args) for target in targets: naln_files = 0 for dir, subdirs, files in os.walk( os.path.join( ensembler.core.default_project_dirnames.models, target)): for file in files: if file == 'alignment.pir': naln_files += 1 assert naln_files == len(templates) for target in targets: seqid_filepath = os.path.join( ensembler.core.default_project_dirnames.models, target, 'sequence-identities.txt') ref_seqid_filepath = os.path.join(ref_resources_dirpath, seqid_filepath) with open(seqid_filepath) as seqid_file: seqid_file_text = seqid_file.read() with open(ref_seqid_filepath) as ref_seqid_file: ref_seqid_file_text = ref_seqid_file.read() print(seqid_file_text) print(ref_seqid_file_text) assert seqid_file_text == ref_seqid_file_text
def test_molprobity_validation(): target_id = 'EGFR_HUMAN_D0' template_ids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] with integrationtest_context('refined_explicit'): molprobity_validation(target_id) for template_id in template_ids: results_filepath = os.path.join( default_project_dirnames.models, target_id, template_id, 'molprobity-refine_explicit_md.yaml') assert os.path.exists(results_filepath) with open(results_filepath) as results_file: results_dict = yaml.load(results_file) assert results_dict.get( 'pct_badbonds') == ref_pct_badbonds[template_id] target_results_filepath = os.path.join( default_project_dirnames.models, target_id, 'validation_scores_sorted-molprobity-refine_explicit_md') with open(target_results_filepath) as target_results_file: target_results = target_results_file.read().splitlines() assert target_results[0] == 'KC1D_HUMAN_D0_4KB8_D 3.828' assert target_results[1] == 'KC1D_HUMAN_D0_4HNF_A 4.035'
def test_cluster_models(): with integrationtest_context(set_up_project_stage='modeled'): ensembler.modeling.cluster_models()
def test_cluster_models(): with integrationtest_context(set_up_project_stage='modeled'): ensembler.modeling.cluster_models()
def test_package_for_fah(): with integrationtest_context(set_up_project_stage='refined_explicit'): package_for_fah( process_only_these_targets=['EGFR_HUMAN_D0'], process_only_these_templates=[ 'KC1D_HUMAN_D0_4HNF_A', 'KC1D_HUMAN_D0_4KB8_D' ] ) packaged_project_base_path = os.path.join( default_project_dirnames.packaged_models, 'fah-projects', 'EGFR_HUMAN_D0' ) assert os.path.exists(packaged_project_base_path) assert os.path.exists(os.path.join( packaged_project_base_path, 'RUN0' )) assert os.path.exists(os.path.join( packaged_project_base_path, 'RUN1' )) target_filenames = [ 'system.xml', 'integrator.xml', ] for target_filename in target_filenames: assert os.path.exists(os.path.join( packaged_project_base_path, target_filename )) run_filenames = [ 'template.txt', 'system.pdb', 'protein.pdb', 'sequence-identity.txt', 'state0.xml', ] for run_id in range(2): for run_filename in run_filenames: assert os.path.exists(os.path.join( packaged_project_base_path, 'RUN{}'.format(run_id), run_filename )) # test whether kinetic energy in new state file is reasonable test_state_filepath = os.path.join(packaged_project_base_path, 'RUN0', 'state0.xml') with open(test_state_filepath) as test_state_file: test_state = mm.XmlSerializer.deserialize(test_state_file.read()) ref_state_filepath = get_installed_resource_filename(os.path.join( 'tests', 'example_project', 'models', 'EGFR_HUMAN_D0', 'KC1D_HUMAN_D0_4HNF_A', 'explicit-state.xml.gz' )) with gzip.open(ref_state_filepath) as ref_state_file: ref_state = mm.XmlSerializer.deserialize(ref_state_file.read()) test_state_kinetic_energy = test_state.getKineticEnergy() ref_state_kinetic_energy = ref_state.getKineticEnergy() assert abs( test_state_kinetic_energy - ref_state_kinetic_energy ) < ref_state_kinetic_energy
def test_mktraj_implicit_start(): with integrationtest_context(set_up_project_stage="refined_explicit"): MkTrajImplicitStart(targetid="EGFR_HUMAN_D0", loglevel="debug")
def test_loopmodel_logs(): with integrationtest_context(set_up_project_stage='templates_modeled_loops'): loopmodel_logs = LoopmodelLogs() loopmodel_logs.add_missing_resis_data()
def test_get_valid_model_filepaths(): targetid = 'EGFR_HUMAN_D0' templateids = ['KC1D_HUMAN_D0_4KB8_D', 'KC1D_HUMAN_D0_4HNF_A'] with integrationtest_context('refined_implicit'): valid_model_filenames = get_valid_model_ids('refine_implicit_md', targetid) assert all([fpath in templateids for fpath in valid_model_filenames])
def test_mktraj_implicit_start(): with integrationtest_context(set_up_project_stage='refined_explicit'): MkTrajImplicitStart(targetid='EGFR_HUMAN_D0', loglevel='debug')
def test_loopmodel_logs(): with integrationtest_context( set_up_project_stage='templates_modeled_loops'): loopmodel_logs = LoopmodelLogs() loopmodel_logs.add_missing_resis_data()