def run_validation(): """Run all validation tests. This is probably best done by running the different validation set singularly since the optimal number of GPUs depends on the protocol. """ for yank_script_filepath in glob.glob(os.path.join('..', '*', '*.yaml')): print('Running {}...'.format(os.path.basename(yank_script_filepath))) yaml_builder = YamlBuilder(yank_script_filepath) yaml_builder.run_experiments()
def run_yank(job_id, n_jobs): openmm_system_dir = os.path.join('..', 'openmmfiles') pdb_dir = os.path.join('..', 'pdbfiles') yank_script_template_filepath = 'yank_template.yaml' # Read in YANK template script. with open(yank_script_template_filepath, 'r') as f: script_template = f.read() # Load cached status calculations. molecules_done = read_status() # Find all molecules to run. molecules_files_pattern = os.path.join(pdb_dir, '*_vacuum.pdb') molecule_ids = [os.path.basename(molecule_file)[:-11] for molecule_file in glob.glob(molecules_files_pattern)] # Sort molecules so that parallel nodes won't make the same calculation. molecule_ids = sorted(molecule_ids) # Create YANK input files. for i, molecule_id in enumerate(molecule_ids): # Check if the job is assigned to this script and/or if we # have already completed this. if (i % n_jobs != job_id - 1 or molecule_id in molecules_done): print_and_flush('Node {}: Skipping {}'.format(job_id, molecule_id)) continue # Output file paths. vacuum_filename = molecule_id + '_vacuum' solvated_filename = molecule_id + '_solvated' vacuum_pdb_filepath = os.path.join(pdb_dir, vacuum_filename + '.pdb') solvated_pdb_filepath = os.path.join(pdb_dir, solvated_filename + '.pdb') vacuum_xml_filepath = os.path.join(openmm_system_dir, vacuum_filename + '.xml') solvated_xml_filepath = os.path.join(openmm_system_dir, solvated_filename + '.xml') # Create yank script. phase1_path = str([solvated_xml_filepath, solvated_pdb_filepath]) phase2_path = str([vacuum_xml_filepath, vacuum_pdb_filepath]) script = script_template.format(experiment_dir=molecule_id, phase1_path=phase1_path, phase2_path=phase2_path) # Run YANK. print_and_flush('Node {}: Running {}'.format(job_id, molecule_id)) yaml_builder = YamlBuilder(script) yaml_builder.run_experiments() # Update completed molecules. update_status(molecule_id)