def download_dataset_to_file(dataset_id): """ Used to retrieve a data set either from Orion or from the local machine """ if in_orion(): if dataset_id in download_cache: return download_cache[dataset_id] if os.path.isfile(dataset_id): download_cache[dataset_id] = dataset_id return dataset_id tmp = NamedTemporaryFile(suffix=".oeb.gz", delete=False) stream = StreamingDataset(dataset_id, input_format=".oeb.gz") stream.download_to_file(tmp.name) download_cache[dataset_id] = tmp.name return tmp.name else: return dataset_id
def dump(cls, molecule, tags=None, outfname=None, tarxz=True): """ Writes the data attached to OEMol to files on disc. Create a tar archive (xz-compressed) of files.""" tag_data = {} totar = [] if tags is None: tag_data = cls.unpack(molecule) else: tag_data = cls.unpack(molecule, tags=tags) if outfname is None: raise Exception('Require an output file name.') # Dump the SD Data sd_data = cls.checkSDData(molecule) sdtxt = outfname + '-sd.txt' with open(sdtxt, 'w') as f: for k, v in sd_data.items(): f.write('{} : {}\n'.format(k, v)) totar.append(sdtxt) print('Dumping data from: %s' % outfname) for tag, data in tag_data.items(): if isinstance(data, parmed.structure.Structure): pdbfname = outfname + '.pdb' print("\tStructure to %s" % pdbfname) data.save(pdbfname, overwrite=True) totar.append(pdbfname) if isinstance(data, openmm.openmm.State): statefname = outfname + '-state.xml' print('\tState to %s' % statefname) with open(statefname, 'w') as f: f.write(openmm.XmlSerializer.serialize(data)) if tarxz: totar.append(statefname) if isinstance(data, io.StringIO): enelog = outfname + '.log' print('\tLog to %s' % enelog) with open(enelog, 'w') as f: f.write(data.getvalue()) if tarxz: totar.append(enelog) if tarxz: tarname = outfname + '.tar.xz' print('Creating tarxz file: {}'.format(tarname)) trajfname = outfname + '.nc' if os.path.isfile(trajfname): totar.append(trajfname) print('Adding {} to {}'.format(trajfname, tarname)) else: print('Could not find {}'.format(trajfname)) tar = tarfile.open(tarname, "w:xz") for name in totar: tar.add(name) tar.close() if in_orion(): # MUST upload tar file directly back to Orion or they disappear. upload_file(tarname, tarname, tags=['TAR']) # Clean up files that have been added to tar. cleanup(totar)
class Fields: # The LigInitialRecord Field is for the initial ligand record read in at the start ligInit_rec = OEField("LigInitial", Types.Record, meta=_metaHidden) # The Title field is a string name for the flask which used to compose file names title = OEField("Title_OPLMD", Types.String, meta=_metaIDHidden) # The flaskid field is a unique integer for each flask (final system for simulation) flaskid = OEField("FlaskID_OPLMD", Types.Int, meta=_metaIDHidden) # The ligid field is a unique integer used to keep track of the ligand input order ligid = OEField("LigID_OPLMD", Types.Int, meta=_metaIDHidden) # The ConfID field is used to identify a particular conformer confid = OEField("ConfID_OPLMD", Types.Int, meta=_metaIDHidden) # The Ligand field should be used to save in a record a ligand as an OEMolecule ligand = OEField( "Ligand_OPLMD", Types.Chem.Mol, meta=OEFieldMeta( options=[Meta.Hints.Chem.Ligand, Meta.Display.Hidden])) # The ligand name ligand_name = OEField("Ligand_name_OPLMD", Types.String, meta=_metaHidden) # The protein field should be used to save in a record a Protein as an OEMolecule protein = OEField("Protein_OPLMD", Types.Chem.Mol, meta=_metaProtHidden) # The protein name protein_name = OEField("Protein_name_OPLMD", Types.String, meta=_metaHidden) # The super-molecule for the entire flask (ie the final system for simulation) flask = OEField("Flask_OPLMD", Types.Chem.Mol, meta=_metaHidden) # Primary Molecule primary_molecule = OEPrimaryMolField() # Parmed Structure, Trajectory, MDData and Protein trajectory conformers Fields if in_orion(): pmd_structure = OEField('Structure_Parmed_OPLMD', Types.Int, meta=_metaHidden) trajectory = OEField("Trajectory_OPLMD", Types.Int, meta=_metaHidden) mddata = OEField("MDData_OPLMD", Types.Int, meta=_metaHidden) protein_traj_confs = OEField("ProtTraj_OPLMD", Types.Int, meta=_metaHidden) else: pmd_structure = OEField('Structure_Parmed_OPLMD', ParmedData, meta=_metaHidden) trajectory = OEField("Trajectory_OPLMD", Types.String, meta=_metaHidden) mddata = OEField("MDData_OPLMD", Types.String, meta=_metaHidden) protein_traj_confs = OEField("ProtTraj_OPLMD", Types.Chem.Mol, meta=_metaHidden) # The Stage Name stage_name = OEField('Stage_name_OPLMD', Types.String) # The Stage Type stage_type = OEField('Stage_type_OPLMD', Types.String) # Topology Field topology = OEField('Topology_OPLMD', Types.Chem.Mol, meta=OEFieldMeta().set_option( Meta.Hints.Chem.PrimaryMol)) # Log Info log_data = OEField('Log_data_OPLMD', Types.String) # MD State md_state = OEField("MDState_OPLMD", MDStateData) # Design Unit Field design_unit = OEField('Design_Unit_OPLMD', DesignUnit) # Design Unit Field from Spruce # design_unit_from_spruce = OEField('du_single', Types.Blob) design_unit_from_spruce = OEField('designunit', Types.Chem.DesignUnit) # MD Components md_components = OEField('MDComponents_OPLMD', MDComponentData) # Collection is used to offload data from the record which must be < 100Mb collection = OEField("Collection_ID_OPLMD", Types.Int, meta=_metaHidden) # Stage list Field md_stages = OEField("MDStages_OPLMD", Types.RecordVec, meta=_metaHidden) floe_report = OEField('Floe_report_OPLMD', Types.String, meta=_metaHidden) floe_report_svg_lig_depiction = OEField("Floe_report_lig_svg_OPLMD", Types.String, meta=OEFieldMeta().set_option( Meta.Hints.Image_SVG)) floe_report_label = OEField('Floe_report_label_OPLMD', Types.String, meta=_metaHidden) floe_report_URL = OEField('Floe_report_URL_OPLMD', Types.String, meta=OEFieldMeta(options=[Meta.Hints.URL])) floe_report_collection_id = OEField('Floe_report_ID_OPLMD', Types.Int, meta=_metaHidden) class Analysis: # The poseIdVec vector addresses an input poseid for each traj frame poseIdVec = OEField("PoseIdVec", Types.IntVec, meta=_metaHidden) # The OETraj Field is for the record containing Traj OEMols and energies oetraj_rec = OEField("OETraj", Types.Record, meta=_metaHidden) # The TrajIntE Field is for the record containing Traj interaction energies oeintE_rec = OEField("TrajIntE", Types.Record, meta=_metaHidden) # The TrajIntEDict Field is for the POD Dictionary containing Traj interaction energies oeintE_dict = OEField("TrajIntEDict", Types.JSONObject, meta=_metaHidden) # The TrajPBSA Field is for the record containing Traj PBSA energies oepbsa_rec = OEField("TrajPBSA", Types.Record, meta=_metaHidden) # The TrajPBSADict Field is for the POD Dictionary containing Traj PBSA energies oepbsa_dict = OEField("TrajPBSADict", Types.JSONObject, meta=_metaHidden) # The TrajClus Field is for the record containing Traj ligand clustering results oeclus_rec = OEField("TrajClus", Types.Record, meta=_metaHidden) # The TrajClusDict Field is for the POD Dictionary containing Traj ligand clustering results oeclus_dict = OEField("TrajClusDict", Types.JSONObject, meta=_metaHidden) # The ClusPopDict Field is for the POD Dictionary containing conf/cluster population results cluspop_dict = OEField("ClusPopDict", Types.JSONObject, meta=_metaHidden) # The AnalysesDone Field is for a list of the analyses that have been done analysesDone = OEField("AnalysesDone", Types.StringVec, meta=_metaHidden) # The Lig_Conf_Data Field is for the record containing Traj conf data for all confs oetrajconf_rec = OEField("Lig_Conf_Data", Types.RecordVec, meta=_metaHidden) # The vector of ligand Traj RMSDs from the initial pose lig_traj_rmsd = OEField('LigTrajRMSD', Types.FloatVec, meta=OEFieldMeta().set_option( Meta.Units.Length.Ang)) # The mmpbsa Field contains the vector of per-frame mmpbsa values over the whole trajectory zapMMPBSA_fld = OEField("OEZap_MMPBSA6_Bind", Types.FloatVec, meta=OEFieldMeta().set_option( Meta.Units.Energy.kCal)) # mmpbsa ensemble average over the whole trajectory mmpbsa_traj_mean = OEField('MMPBSATrajMean', Types.Float, meta=OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol)) metaMMPBSA_traj_serr = OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol) metaMMPBSA_traj_serr.add_relation(Meta.Relations.ErrorsFor, mmpbsa_traj_mean) mmpbsa_traj_serr = OEField('MMPBSATrajSerr', Types.Float, meta=metaMMPBSA_traj_serr) # The number of major clusters found n_major_clusters = OEField("n major clusters", Types.Int) # Trajectory cluster averages and medians of protein and ligand ClusLigAvg_fld = OEField('ClusLigAvgMol', Types.Chem.Mol) ClusProtAvg_fld = OEField('ClusProtAvgMol', Types.Chem.Mol) ClusLigMed_fld = OEField('ClusLigMedMol', Types.Chem.Mol) ClusProtMed_fld = OEField('ClusProtMedMol', Types.Chem.Mol) max_waters = OEField("MaxWaters_OPLMD", Types.Int, meta=_metaHidden) # Free Energy Yank # Analysis Fields free_energy = OEField('FE_OPLMD', Types.Float, meta=OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol)) metaFreeEnergy_err = OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol) metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy) free_energy_err = OEField('FE_Error_OPLMD', Types.Float, meta=metaFreeEnergy_err) class FEC: # Free Energy free_energy = OEField('FE_OPLMD', Types.Float, meta=OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol)) metaFreeEnergy_err = OEFieldMeta().set_option( Meta.Units.Energy.kCal_per_mol) metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy) free_energy_err = OEField('FE_Error_OPLMD', Types.Float, meta=metaFreeEnergy_err) class RBFEC: # Oriented Edge field for relative free energy calculations # The first integer of the list is the ligand ID of the starting # thermodynamic state and the second the final one edgeid = OEField("EdgeID_OPLMD", Types.Int, meta=_metaHidden) edge_name = OEField("EdgeName_OPLMD", Types.String) # The Thermodynamics leg type is used for Bound and # UnBound State run identification thd_leg_type = OEField("Thd_Leg_OPLMD", Types.String, meta=_metaHidden) class NESC: state_A = OEField("StateA_OPLMD", Types.Record) state_B = OEField("StateB_OPLMD", Types.Record) gmx_top = OEField("GMX_Top_OPLMD", Types.String, meta=_metaHidden) gmx_gro = OEField("GMX_Gro_OPLMD", Types.String, meta=_metaHidden) work = OEField("GMX_Work_OPLMD", Types.Float, meta=OEFieldMeta().set_option( Meta.Units.Energy.kJ_per_mol)) frame_count = OEField("frame_count", Types.Int, meta=_metaHidden) # The Work record is used to collect the data related to the # Work Forward and Reverse for the Bound and Unbound States work_rec = OEField("Work_Record_OPLMD", Types.Record) # The Relative Binding Affinity record collects data for the # different analysis methods used to compute it DDG_rec = OEField("DDG_Record_OPLMD", Types.Record)
def __init__(self, mdstate, parmed_structure, opt): super().__init__(mdstate, parmed_structure, opt) opt['platform'] = 'Auto' opt['cuda_opencl_precision'] = 'mixed' topology = parmed_structure.topology positions = mdstate.get_positions() velocities = mdstate.get_velocities() box = mdstate.get_box_vectors() opt['omm_log_fn'] = os.path.join(opt['out_directory'], 'trajectory.log') opt['omm_trj_fn'] = os.path.join(opt['out_directory'], 'trajectory.h5') # Time step in ps if opt['hmr']: self.stepLen = 0.004 * unit.picoseconds opt['Logger'].info("Hydrogen Mass repartitioning is On") else: self.stepLen = 0.002 * unit.picoseconds opt['timestep'] = self.stepLen # Centering the system to the OpenMM Unit Cell if opt['center'] and box is not None: opt['Logger'].info("[{}] Centering is On".format(opt['CubeTitle'])) # Numpy array in A coords = parmed_structure.coordinates # System Center of Geometry cog = np.mean(coords, axis=0) # System box vectors box_v = parmed_structure.box_vectors.in_units_of(unit.angstrom) / unit.angstrom box_v = np.array([box_v[0][0], box_v[1][1], box_v[2][2]]) # Translation vector delta = box_v / 2 - cog # New Coordinates new_coords = coords + delta parmed_structure.coordinates = new_coords positions = parmed_structure.positions mdstate.set_positions(positions) # Constraint type constraints = md_keys_converter[MDEngines.OpenMM]['constraints'][opt['constraints']] # OpenMM system if box is not None: box_v = parmed_structure.box_vectors.value_in_unit(unit.angstrom) box_v = np.array([box_v[0][0], box_v[1][1], box_v[2][2]]) min_box = np.min(box_v) threshold = (min_box / 2.0) * 0.85 if opt['nonbondedCutoff'] < threshold: cutoff_distance = opt['nonbondedCutoff'] * unit.angstroms else: opt['Logger'].warn("[{}] Cutoff Distance too large for the box size. Set the cutoff distance " "to {} A".format(opt['CubeTitle'], threshold)) cutoff_distance = threshold * unit.angstroms self.system = parmed_structure.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=cutoff_distance, constraints=eval("app.%s" % constraints), removeCMMotion=False, hydrogenMass=4.0 * unit.amu if opt['hmr'] else None) else: # Vacuum self.system = parmed_structure.createSystem(nonbondedMethod=app.NoCutoff, constraints=eval("app.%s" % constraints), removeCMMotion=False, hydrogenMass=4.0 * unit.amu if opt['hmr'] else None) # Add Implicit Solvent Force if opt['implicit_solvent'] != 'None': opt['Logger'].info("[{}] Implicit Solvent Selected".format(opt['CubeTitle'])) implicit_force = parmed_structure.omm_gbsa_force(eval("app.%s" % opt['implicit_solvent']), temperature=opt['temperature'] * unit.kelvin, nonbondedMethod=app.PME, nonbondedCutoff=opt['nonbondedCutoff'] * unit.angstroms) self.system.addForce(implicit_force) # OpenMM Integrator integrator = openmm.LangevinIntegrator(opt['temperature'] * unit.kelvin, 1 / unit.picoseconds, self.stepLen) if opt['SimType'] == 'npt': if box is None: raise ValueError("NPT simulation without box vector") # Add Force Barostat to the system self.system.addForce( openmm.MonteCarloBarostat(opt['pressure'] * unit.atmospheres, opt['temperature'] * unit.kelvin, 25)) # Apply restraints if opt['restraints']: opt['Logger'].info("[{}] RESTRAINT mask applied to: {}" "\tRestraint weight: {}".format(opt['CubeTitle'], opt['restraints'], opt['restraintWt'] * unit.kilocalories_per_mole / unit.angstroms ** 2)) # Select atom to restraint res_atom_set = oeommutils.select_oemol_atom_idx_by_language(opt['molecule'], mask=opt['restraints']) opt['Logger'].info("[{}] Number of restraint atoms: {}".format(opt['CubeTitle'], len(res_atom_set))) # define the custom force to restrain atoms to their starting positions force_restr = openmm.CustomExternalForce('k_restr*periodicdistance(x, y, z, x0, y0, z0)^2') # Add the restraint weight as a global parameter in kcal/mol/A^2 force_restr.addGlobalParameter("k_restr", opt['restraintWt'] * unit.kilocalories_per_mole / unit.angstroms ** 2) # Define the target xyz coords for the restraint as per-atom (per-particle) parameters force_restr.addPerParticleParameter("x0") force_restr.addPerParticleParameter("y0") force_restr.addPerParticleParameter("z0") if opt['restraint_to_reference'] and box is not None: opt['Logger'].info("[{}] Restraint to the Reference State Enabled".format(opt['CubeTitle'])) reference_positions = opt['reference_state'].get_positions() coords = np.array(reference_positions.value_in_unit(unit.nanometers)) # System Center of Geometry cog = np.mean(coords, axis=0) # System box vectors box_v = opt['reference_state'].get_box_vectors().value_in_unit(unit.nanometers) box_v = np.array([box_v[0][0], box_v[1][1], box_v[2][2]]) # Translation vector delta = box_v / 2 - cog # New Coordinates corrected_reference_positions = coords + delta for idx in range(0, len(positions)): if idx in res_atom_set: if opt['restraint_to_reference']: xyz = corrected_reference_positions[idx] # nanometers unit else: xyz = positions[idx].in_units_of(unit.nanometers) / unit.nanometers force_restr.addParticle(idx, xyz) self.system.addForce(force_restr) # Freeze atoms if opt['freeze']: opt['Logger'].info("[{}] FREEZE mask applied to: {}".format(opt['CubeTitle'], opt['freeze'])) freeze_atom_set = oeommutils.select_oemol_atom_idx_by_language(opt['molecule'], mask=opt['freeze']) opt['Logger'].info("[{}] Number of frozen atoms: {}".format(opt['CubeTitle'], len(freeze_atom_set))) # Set atom masses to zero for idx in range(0, len(positions)): if idx in freeze_atom_set: self.system.setParticleMass(idx, 0.0) # Platform Selection if opt['platform'] == 'Auto': # Select the platform for plt_name in ['CUDA', 'OpenCL', 'CPU', 'Reference']: try: platform = openmm.Platform_getPlatformByName(plt_name) break except: if plt_name == 'Reference': raise ValueError('It was not possible to select any OpenMM Platform') else: pass if platform.getName() in ['CUDA', 'OpenCL']: for precision in ['mixed', 'single', 'double']: try: # Set platform precision for CUDA or OpenCL properties = {'Precision': precision} if 'gpu_id' in opt and 'OE_VISIBLE_DEVICES' in os.environ and not in_orion(): properties['DeviceIndex'] = opt['gpu_id'] simulation = app.Simulation(topology, self.system, integrator, platform=platform, platformProperties=properties) break except: if precision == 'double': raise ValueError('It was not possible to select any Precision ' 'for the selected Platform: {}'.format(platform.getName())) else: pass else: # CPU or Reference simulation = app.Simulation(topology, self.system, integrator, platform=platform) else: # Not Auto Platform selection try: platform = openmm.Platform.getPlatformByName(opt['platform']) except Exception as e: raise ValueError('The selected platform is not supported: {}'.format(str(e))) if opt['platform'] in ['CUDA', 'OpenCL']: try: # Set platform CUDA or OpenCL precision properties = {'Precision': opt['cuda_opencl_precision']} simulation = app.Simulation(topology, self.system, integrator, platform=platform, platformProperties=properties) except Exception: raise ValueError('It was not possible to set the {} precision for the {} platform' .format(opt['cuda_opencl_precision'], opt['platform'])) else: # CPU or Reference Platform simulation = app.Simulation(topology, self.system, integrator, platform=platform) # Set starting positions and velocities simulation.context.setPositions(positions) # Set Box dimensions if box is not None: simulation.context.setPeriodicBoxVectors(box[0], box[1], box[2]) # If the velocities are not present in the Parmed structure # new velocity vectors are generated otherwise the system is # restarted from the previous State if opt['SimType'] in ['nvt', 'npt']: if velocities is not None: opt['Logger'].info('[{}] RESTARTING simulation from a previous State'.format(opt['CubeTitle'])) simulation.context.setVelocities(velocities) else: # Set the velocities drawing from the Boltzmann distribution at the selected temperature opt['Logger'].info('[{}] GENERATING a new starting State'.format(opt['CubeTitle'])) simulation.context.setVelocitiesToTemperature(opt['temperature'] * unit.kelvin) # Convert simulation time in steps opt['steps'] = int(round(opt['time'] / (self.stepLen.in_units_of(unit.nanoseconds) / unit.nanoseconds))) # Set Reporters for rep in getReporters(**opt): simulation.reporters.append(rep) # OpenMM platform information mmver = openmm.version.version mmplat = simulation.context.getPlatform() str_logger = '\n' + '-' * 32 + ' SIMULATION ' + '-' * 32 str_logger += '\n' + '{:<25} = {:<10}'.format('time step', str(opt['timestep'])) # Host information for k, v in uname()._asdict().items(): str_logger += "\n{:<25} = {:<10}".format(k, v) opt['Logger'].info("[{}] {} : {}".format(opt['CubeTitle'], k, v)) # Platform properties for prop in mmplat.getPropertyNames(): val = mmplat.getPropertyValue(simulation.context, prop) str_logger += "\n{:<25} = {:<10}".format(prop, val) opt['Logger'].info("[{}] {} : {}".format(opt['CubeTitle'], prop, val)) info = "{:<25} = {:<10}".format("OpenMM Version", mmver) opt['Logger'].info("[{}] OpenMM Version : {}".format(opt['CubeTitle'], mmver)) str_logger += '\n' + info info = "{:<25} = {:<10}".format("Platform in use", mmplat.getName()) opt['Logger'].info("[{}] Platform in use : {}".format(opt['CubeTitle'], mmplat.getName())) str_logger += '\n' + info self.mdstate = mdstate self.parmed_structure = parmed_structure self.opt = opt self.str_logger = str_logger self.omm_simulation = simulation return
def _file_processing(**opt): """ This supporting function compresses the produced trajectory and supporting files in a .tar file (if required ) and eventually uploaded them to Orion. If not .tar file is selected then all the generated files are eventually uploaded in Orion Parameters ---------- opt: python dictionary A dictionary containing all the MD setting info """ # Set the trajectory file name if opt['trajectory_filetype'] == 'NetCDF': trj_fn = opt['outfname'] +'.nc' elif opt['trajectory_filetype'] == 'DCD': trj_fn = opt['outfname'] +'.dcd' elif opt['trajectory_filetype'] == 'HDF5': trj_fn = opt['outfname'] + '.hdf5' else: oechem.OEThrow.Fatal("The selected trajectory filetype is not supported: {}" .format(opt['trajectory_filetype'])) # Set .pdb file names pdb_fn = opt['outfname'] + '.pdb' pdb_order_fn = opt['outfname'] + '_ordering_test' + '.pdb' log_fn = opt['outfname'] + '.log' # List all the file names fnames = [trj_fn, pdb_fn, pdb_order_fn, log_fn] ex_files = [] # Check which file names are actually produced files for fn in fnames: if os.path.isfile(fn): ex_files.append(fn) # Tar the outputted files if required if opt['tar']: tarname = opt['outfname'] + '.tar' opt['Logger'].info('Creating tar file: {}'.format(tarname)) tar = tarfile.open(tarname, "w") for name in ex_files: opt['Logger'].info('Adding {} to {}'.format(name, tarname)) tar.add(name) tar.close() opt['molecule'].SetData(oechem.OEGetTag("Tar_fname"), tarname) if in_orion(): upload_file(tarname, tarname, tags=['TRJ_INFO']) # Clean up files that have been added to tar. for tmp in ex_files: try: os.remove(tmp) except: pass else: # If not .tar file is required the files are eventually uploaded in Orion if in_orion(): for fn in ex_files: upload_file(fn, fn, tags=['TRJ_INFO']) return