def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in directory {}.'.format(title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception('Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info('Ran Yank experiments for molecule {}.'.format(title)) # Analyze the hydration free energy. from yank.analyze import estimate_free_energies (Deltaf_ij_solvent, dDeltaf_ij_solvent) = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent1.nc', 'r')) (Deltaf_ij_vacuum, dDeltaf_ij_vacuum) = estimate_free_energies(netcdf.Dataset(output_directory + '/experiments/solvent2.nc', 'r')) DeltaG_hydration = Deltaf_ij_vacuum[0,-1] - Deltaf_ij_solvent[0,-1] dDeltaG_hydration = np.sqrt(Deltaf_ij_vacuum[0,-1]**2 + Deltaf_ij_solvent[0,-1]**2) # Add result to original molecule oechem.OESetSDData(mol, 'DeltaG_yank_hydration', str(DeltaG_hydration * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_hydration', str(dDeltaG_hydration * kT_in_kcal_per_mole)) self.log.info('Analyzed and stored hydration free energy for molecule {}.'.format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info('Exception encountered when processing molecule {}.'.format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
def run_validation(): """Run all validation tests. This is probably best done by running the different validation set singularly since the optimal number of GPUs depends on the protocol. """ for yank_script_filepath in glob.glob(os.path.join('..', '*', '*.yaml')): print('Running {}...'.format(os.path.basename(yank_script_filepath))) yaml_builder = YamlBuilder(yank_script_filepath) yaml_builder.run_experiments()
def run_yank(job_id, n_jobs): openmm_system_dir = os.path.join('..', 'openmmfiles') pdb_dir = os.path.join('..', 'pdbfiles') yank_script_template_filepath = 'yank_template.yaml' # Read in YANK template script. with open(yank_script_template_filepath, 'r') as f: script_template = f.read() # Load cached status calculations. molecules_done = read_status() # Find all molecules to run. molecules_files_pattern = os.path.join(pdb_dir, '*_vacuum.pdb') molecule_ids = [os.path.basename(molecule_file)[:-11] for molecule_file in glob.glob(molecules_files_pattern)] # Sort molecules so that parallel nodes won't make the same calculation. molecule_ids = sorted(molecule_ids) # Create YANK input files. for i, molecule_id in enumerate(molecule_ids): # Check if the job is assigned to this script and/or if we # have already completed this. if (i % n_jobs != job_id - 1 or molecule_id in molecules_done): print_and_flush('Node {}: Skipping {}'.format(job_id, molecule_id)) continue # Output file paths. vacuum_filename = molecule_id + '_vacuum' solvated_filename = molecule_id + '_solvated' vacuum_pdb_filepath = os.path.join(pdb_dir, vacuum_filename + '.pdb') solvated_pdb_filepath = os.path.join(pdb_dir, solvated_filename + '.pdb') vacuum_xml_filepath = os.path.join(openmm_system_dir, vacuum_filename + '.xml') solvated_xml_filepath = os.path.join(openmm_system_dir, solvated_filename + '.xml') # Create yank script. phase1_path = str([solvated_xml_filepath, solvated_pdb_filepath]) phase2_path = str([vacuum_xml_filepath, vacuum_pdb_filepath]) script = script_template.format(experiment_dir=molecule_id, phase1_path=phase1_path, phase2_path=phase2_path) # Run YANK. print_and_flush('Node {}: Running {}'.format(job_id, molecule_id)) yaml_builder = YamlBuilder(script) yaml_builder.run_experiments() # Update completed molecules. update_status(molecule_id)
def dispatch(args): """ Set up and run YANK calculation from a script. Parameters ---------- args : dict Command-line arguments from docopt. """ if args['--yaml']: yaml_builder = YamlBuilder(yaml_source=args['--yaml']) yaml_builder.build_experiment() return True return False
def dispatch(args): """ Set up and run YANK calculation from a script. Parameters ---------- args : dict Command-line arguments from docopt. """ if args['--yaml']: yaml_builder = YamlBuilder(yaml_source=args['--yaml']) yaml_builder.build_experiment() return True return False
def dispatch(args): """ Set up and run YANK calculation from a script. Parameters ---------- args : dict Command-line arguments from docopt. """ if args['--yaml']: yaml_path = args['--yaml'] if not os.path.isfile(yaml_path): raise ValueError('Cannot find YAML script "{}"'.format(yaml_path)) yaml_builder = YamlBuilder(yaml_source=yaml_path) yaml_builder.build_experiments() return True return False
def process(self, mol, port): kT_in_kcal_per_mole = self.kT.value_in_unit(unit.kilocalories_per_mole) # Retrieve data about which molecule we are processing title = mol.GetTitle() with TemporaryDirectory() as output_directory: try: # Print out which molecule we are processing self.log.info('Processing {} in {}.'.format( title, output_directory)) # Check that molecule is charged. if not molecule_is_charged(mol): raise Exception( 'Molecule %s has no charges; input molecules must be charged.' % mol.GetTitle()) # Write the receptor. pdbfilename = os.path.join(output_directory, 'receptor.pdb') with oechem.oemolostream(pdbfilename) as ofs: res = oechem.OEWriteConstMolecule(ofs, self.receptor) if res != oechem.OEWriteMolReturnCode_Success: raise RuntimeError( "Error writing receptor: {}".format(res)) # Write the specified molecule out to a mol2 file without changing its name. mol2_filename = os.path.join(output_directory, 'input.mol2') ofs = oechem.oemolostream(mol2_filename) oechem.OEWriteMol2File(ofs, mol) # Undo oechem fuckery with naming mol2 substructures `<0>` from YankCubes.utils import unfuck_oechem_mol2_file unfuck_oechem_mol2_file(mol2_filename) # Run YANK on the specified molecule. from yank.yamlbuild import YamlBuilder yaml = self.construct_yaml(output_directory=output_directory) yaml_builder = YamlBuilder(yaml) yaml_builder.build_experiments() self.log.info( 'Ran Yank experiments for molecule {}.'.format(title)) # Analyze the binding free energy # TODO: Use yank.analyze API for this from YankCubes.analysis import analyze store_directory = os.path.join(output_directory, 'experiments') [DeltaG_binding, dDeltaG_binding] = analyze(store_directory) """ # Extract trajectory (DEBUG) from yank.analyze import extract_trajectory trajectory_filename = 'trajectory.pdb' store_filename = os.path.join(store_directory, 'complex.pdb') extract_trajectory(trajectory_filename, store_filename, state_index=0, keep_solvent=False, discard_equilibration=True, image_molecules=True) ifs = oechem.oemolistream(trajectory_filename) ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) # load multi-conformer molecule mol = oechem.OEMol() for mol in ifs.GetOEMols(): print (mol.GetTitle(), "has", mol.NumConfs(), "conformers") ifs.close() os.remove(trajectory_filename) """ # Attach binding free energy estimates to molecule oechem.OESetSDData(mol, 'DeltaG_yank_binding', str(DeltaG_binding * kT_in_kcal_per_mole)) oechem.OESetSDData(mol, 'dDeltaG_yank_binding', str(dDeltaG_binding * kT_in_kcal_per_mole)) self.log.info( 'Analyzed and stored binding free energy for molecule {}.'. format(title)) # Emit molecule to success port. self.success.emit(mol) except Exception as e: self.log.info( 'Exception encountered when processing molecule {}.'. format(title)) # Attach error message to the molecule that failed # TODO: If there is an error in the leap setup log, # we should capture that and attach it to the failed molecule. self.log.error(traceback.format_exc()) mol.SetData('error', str(e)) # Return failed molecule self.failure.emit(mol)
def dispatch_binding(args): """ Set up a binding free energy calculation. Parameters ---------- args : dict Command-line arguments from docopt. """ verbose = args['--verbose'] store_dir = args['--store'] utils.config_root_logger(verbose, log_file_path=os.path.join(store_dir, 'prepare.log')) # # Determine simulation options. # # Specify thermodynamic parameters. temperature = process_unit_bearing_arg(args, '--temperature', unit.kelvin) pressure = process_unit_bearing_arg(args, '--pressure', unit.atmospheres) thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=pressure) # Create systems according to specified setup/import method. if args['amber']: [phases, systems, positions, atom_indices] = setup_binding_amber(args) elif args['gromacs']: [phases, systems, positions, atom_indices] = setup_binding_gromacs(args) else: logger.error("No valid binding free energy calculation setup command specified: Must be one of ['amber', 'systembuilder'].") # Trigger help argument to be returned. return False # Report some useful properties. if verbose: if 'complex-explicit' in atom_indices: phase = 'complex-explicit' else: phase = 'complex-implicit' logger.info("TOTAL ATOMS : %9d" % len(atom_indices[phase]['complex'])) logger.info("receptor : %9d" % len(atom_indices[phase]['receptor'])) logger.info("ligand : %9d" % len(atom_indices[phase]['ligand'])) if phase == 'complex-explicit': logger.info("solvent and ions : %9d" % len(atom_indices[phase]['solvent'])) # Set options. options = dict() if args['--nsteps']: options['nsteps_per_iteration'] = int(args['--nsteps']) if args['--iterations']: options['number_of_iterations'] = int(args['--iterations']) if args['--equilibrate']: options['number_of_equilibration_iterations'] = int(args['--equilibrate']) if args['--online-analysis']: options['online_analysis'] = True if args['--restraints']: options['restraint_type'] = args['--restraints'] if args['--randomize-ligand']: options['randomize_ligand'] = True if args['--minimize']: options['minimize'] = True # Allow platform to be optionally specified in order for alchemical tests to be carried out. if args['--platform'] not in [None, 'None']: options['platform'] = openmm.Platform.getPlatformByName(args['--platform']) if args['--precision']: # We need to modify the Platform object. if args['--platform'] is None: raise Exception("The --platform argument must be specified in order to specify platform precision.") # Set platform precision. precision = args['--precision'] platform_name = args['--platform'] logger.info("Setting %s platform to use precision model '%s'." % platform_name, precision) if precision is not None: if platform_name == 'CUDA': options['platform'].setPropertyDefaultValue('CudaPrecision', precision) elif platform_name == 'OpenCL': options['platform'].setPropertyDefaultValue('OpenCLPrecision', precision) elif platform_name == 'CPU': if precision != 'mixed': raise Exception("CPU platform does not support precision model '%s'; only 'mixed' is supported." % precision) elif platform_name == 'Reference': if precision != 'double': raise Exception("Reference platform does not support precision model '%s'; only 'double' is supported." % precision) else: raise Exception("Platform selection logic is outdated and needs to be updated to add platform '%s'." % platform_name) # Parse YAML options, CLI options have priority if args['--yaml']: options.update(YamlBuilder(args['--yaml']).yank_options) # Create new simulation. yank = Yank(store_dir, **options) yank.create(phases, systems, positions, atom_indices, thermodynamic_state) # Report success. return True
def test_protein_ligand_restraints(): """Test the restraints in a protein:ligand system. """ from yank.yamlbuild import YamlBuilder from yank.utils import get_data_filename yaml_script = """ --- options: minimize: no verbose: no output_dir: %(output_directory)s number_of_iterations: 2 nsteps_per_iteration: 10 temperature: 300*kelvin molecules: T4lysozyme: filepath: %(receptor_filepath)s p-xylene: filepath: %(ligand_filepath)s antechamber: charge_method: bcc solvents: vacuum: nonbonded_method: NoCutoff systems: lys-pxyl: receptor: T4lysozyme ligand: p-xylene solvent: vacuum leap: parameters: [oldff/leaprc.ff14SB, leaprc.gaff] protocols: absolute-binding: complex: alchemical_path: lambda_restraints: [0.0, 0.5, 1.0] lambda_electrostatics: [1.0, 1.0, 1.0] lambda_sterics: [1.0, 1.0, 1.0] solvent: alchemical_path: lambda_electrostatics: [1.0, 1.0, 1.0] lambda_sterics: [1.0, 1.0, 1.0] experiments: system: lys-pxyl protocol: absolute-binding restraint: type: %(restraint_type)s """ # Test all possible restraint types. available_restraint_types = yank.restraints.available_restraint_types() for restraint_type in available_restraint_types: print('***********************************') print('Testing %s restraints...' % restraint_type) print('***********************************') output_directory = tempfile.mkdtemp() data = { 'output_directory': output_directory, 'restraint_type': restraint_type, 'receptor_filepath': get_data_filename( 'tests/data/p-xylene-implicit/181L-pdbfixer.pdb'), 'ligand_filepath': get_data_filename('tests/data/p-xylene-implicit/p-xylene.mol2'), } # run both setup and experiment yaml_builder = YamlBuilder(yaml_script % data) yaml_builder.build_experiments() # Clean up shutil.rmtree(output_directory)