class InputGromacs(SourceCube): uuid = "734c4f6f-8ccf-4d78-b37e-8c1a5b143454" # version = "0.1.4" title = "InputTprGromacs" classification = [["Gromacs", "Reader"]] tags = ["OpenEye", "Gromacs", "MD"] description = "This Cube read in a Gromacs .tpr file or a recovery dataset" success = RecordOutputPort('success') failure = RecordOutputPort('failure') prefix_name = parameters.StringParameter( "prefix_name", description="The system prefix name", required=True, default="PROT") tpr = FileInputParameter("tpr", title="Gromacs Tpr", description="Gromacs 2019 Tpr file input", required=False, default=None) data_in = DatasetInputParameter( "data_in", title="Input Dataset", description="The Dataset used for restarting. OPTIONAL", required=False, default=None) def __iter__(self): datasets = list(self.args.data_in) files = list(self.args.tpr) if len(datasets) > 0: for dataset in datasets: for record in dataset.records(): yield record elif len(files) > 0: if self.args.prefix_name == "": self.args.prefix_name = "PROT" for file_obj in files: with TemporaryPath(suffix=".tpr") as path: file_obj.copy_to(path) with open(path, "rb") as f: tpr_bytes = f.read() record = OERecord() record.set_value(Fields.tpr_field, tpr_bytes) record.set_value(Fields.prefix_name_field, self.args.prefix_name) yield record break else: raise ValueError( "A Gromacs input .tpr file or a restart dataset is required")
class ForceFieldCube(RecordPortsMixin, ComputeCube): title = "Force Field Application" # version = "0.1.4" classification = [["Force Field"]] tags = ['ForceField'] description = """ This Cube parametrizes a flask with the selected force fields. The cube tries to split a flask into components: protein, ligand, water and excipients. The user can select the parametrization to be applied to each component. The protein forcefield is limited to standard amino acids and limited support to non-standard. Sugars are not currently supported but this will be improved in coming releases. The cube requires a record as input and produces a new record where the flask has been parametrized. The parametrization is carried out by using a Parmed object (https://github.com/ParmEd/ParmEd) which will be present on the emitted record. The supported protein force fields are amber99sb-ildn and the new amberfb-15. Small organic molecules like ligands and excipients can be parametrized by using GAFF, GAFF2 and SMIRNOFF forcefields. The flask splitting is based on the ligand residue name. The default one is “LIG” and can be changed by using the provided cube parameter. Water is currently parametrized by using TIP3P force field water model only. """ uuid = "aac0d06f-afd3-4801-ba50-2d703a07ab35" # Override defaults for some parameters parameter_overrides = { "memory_mb": {"default": 14000}, "spot_policy": {"default": "Allowed"}, "prefetch_count": {"default": 1}, # 1 molecule at a time "item_count": {"default": 1} # 1 molecule at a time } protein_forcefield = parameters.StringParameter( 'protein_forcefield', default=sorted(ff_library.proteinff)[0], choices=sorted(ff_library.proteinff), help_text='Force field parameters to be applied to the protein') ligand_forcefield = parameters.StringParameter( 'ligand_forcefield', default=sorted(ff_library.ligandff)[0], choices=sorted(ff_library.ligandff), help_text='Force field to be applied to the ligand') suffix = parameters.StringParameter( 'suffix', default='prep', help_text='Filename suffix for output simulation files') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, record, port): try: opt = self.opt opt['CubeTitle'] = self.title if not record.has_value(Fields.md_components): raise ValueError("MD Components Field is missing") md_components = record.get_value(Fields.md_components) flask, map_comp = md_components.create_flask opt['Logger'].info(md_components.get_info) if not record.has_value(Fields.title): self.log.warn("Missing record Title field") flask_title = flask.GetTitle()[0:12] else: flask_title = record.get_value(Fields.title) # Parametrize the whole flask flask_pmd_structure = md_components.parametrize_components(protein_ff=opt['protein_forcefield'], ligand_ff=opt['ligand_forcefield']) # Set Parmed structure box_vectors is_periodic = True if md_components.get_box_vectors is not None: flask_pmd_structure.box_vectors = md_components.get_box_vectors else: is_periodic = False self.log.warn("Flask {} has been parametrize without periodic box vectors ".format(flask_title)) if flask.NumAtoms() != flask_pmd_structure.topology.getNumAtoms(): raise ValueError("The flask {} and the generated Parmed structure " "have mismatch atom numbers: {} vs {}". format(flask_title, flask.NumAtoms(), flask_pmd_structure.topology.getNumAtoms())) # Check Formal vs Partial charges flask_formal_charge = 0 for at in flask.GetAtoms(): flask_formal_charge += at.GetFormalCharge() flask_partial_charge = 0.0 for at in flask_pmd_structure.atoms: flask_partial_charge += at.charge if abs(flask_formal_charge - flask_partial_charge) > 0.01: raise ValueError("Flask Formal charge and flask Partial charge mismatch: {} vs {}".format( flask_formal_charge, flask_partial_charge)) # Copying the charges between the parmed structure and the oemol for parm_at, oe_at in zip(flask_pmd_structure.atoms, flask.GetAtoms()): if parm_at.atomic_number != oe_at.GetAtomicNum(): raise ValueError( "Atomic number mismatch between the Parmed and the OpenEye topologies: {} - {}". format(parm_at.atomic_number, oe_at.GetAtomicNum())) oe_at.SetPartialCharge(parm_at.charge) # Set the component charges for comp_name, comp in md_components.get_components.items(): for at_comp in comp.GetAtoms(): pred = oechem.OEHasAtomIdx(map_comp[comp_name][at_comp.GetIdx()]) at_flask = flask.GetAtom(pred) if at_flask.GetAtomicNum() != at_comp.GetAtomicNum(): "Atomic number mismatch between the component {} atom {} and the flask atom {}".\ format(comp_name, at_comp, at_flask) at_comp.SetPartialCharge(at_flask.GetPartialCharge()) md_components.set_component_by_name(comp_name, comp) # Update the components after setting the charges record.set_value(Fields.md_components, md_components) # Check if it is possible to create the OpenMM System if is_periodic: omm_flask = flask_pmd_structure.createSystem(nonbondedMethod=app.CutoffPeriodic, nonbondedCutoff=10.0 * unit.angstroms, constraints=None, removeCMMotion=False, rigidWater=False) else: omm_flask = flask_pmd_structure.createSystem(nonbondedMethod=app.NoCutoff, constraints=None, removeCMMotion=False, rigidWater=False) mdrecord = MDDataRecord(record) sys_id = mdrecord.get_flask_id mdrecord.set_flask(flask) mdrecord.set_parmed(flask_pmd_structure, shard_name="Parmed_" + flask_title + '_' + str(sys_id)) data_fn = os.path.basename(mdrecord.cwd) + '_' + flask_title+'_' + str(sys_id) + '-' + opt['suffix']+'.tar.gz' if not mdrecord.add_new_stage(MDStageNames.ForceField, MDStageTypes.SETUP, flask, MDState(flask_pmd_structure), data_fn): raise ValueError("Problems adding the new Parametrization Stage") self.success.emit(mdrecord.get_record) del mdrecord except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format(str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
class LigandSetting(RecordPortsMixin, ComputeCube): title = "Ligand Setting" # version = "0.1.4" classification = [["System Preparation"]] tags = ['Ligand'] description = """ This Cube is used to set the ligand residue name as the cube parameter “lig_res_name” (default: “LIG”). This is necessary to facilitate the identification of system components during a system splitting. """ uuid = "fce16dd4-ce3a-4374-92f0-4ed24259d2f6" # Override defaults for some parameters parameter_overrides = { "memory_mb": {"default": 2000}, "spot_policy": {"default": "Prohibited"}, "prefetch_count": {"default": 1}, # 1 molecule at a time "item_count": {"default": 1} # 1 molecule at a time } # Ligand Residue Name lig_res_name = parameters.StringParameter('lig_res_name', default='LIG', help_text='The new ligand residue name') max_md_runs = parameters.IntegerParameter('max_md_runs', default=500, help_text='The maximum allowed number of md runs') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.ligand_count = 0 self.max_runs = 0 def process(self, initialRecord, port): try: if not initialRecord.has_value(Fields.primary_molecule): raise ValueError("Missing Primary Molecule field") ligand = initialRecord.get_value(Fields.primary_molecule) # place the entire initial record as a sub-record, to be restored when conformer runs are gathered record = OERecord() record.set_value(Fields.ligInit_rec, initialRecord) if oechem.OECalculateMolecularWeight(ligand) > 1500.0: # Units are in Dalton raise ValueError("[{}] The molecule {} seems to have a large molecular weight for a " "ligand: {:.2f} Da)".format(self.title, ligand.GetTitle(), oechem.OECalculateMolecularWeight(ligand))) # Removing Interaction Hint Container, Style and PDB Data oechem.OEDeleteInteractionsHintSerializationData(ligand) oechem.OEDeleteInteractionsHintSerializationIds(ligand) oechem.OEClearStyle(ligand) oechem.OEClearPDBData(ligand) # Ligand sanitation ligand = oeommutils.sanitizeOEMolecule(ligand) lig_title = ligand.GetTitle() if lig_title == "": lig_title = 'LIG' record.set_value(Fields.ligand_name, lig_title) for at in ligand.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.args.lig_res_name) oechem.OEAtomSetResidue(at, residue) record.set_value(Fields.primary_molecule, ligand) record.set_value(Fields.ligid, self.ligand_count) self.success.emit(record) self.ligand_count += 1 self.max_runs += ligand.NumConfs() except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format(str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(initialRecord) def end(self): if self.max_runs > self.opt['max_md_runs']: raise ValueError('IMPORTANT: The detected total number of md runs is greater than the ' 'max allowed md run setting: {} vs {}\n. If it is required to run ' 'all the detected flasks you have to increase the max_md_runs ' 'option. BE AWARE that the job total cost could be expensive' ''.format(self.max_runs, self.opt['max_md_runs']))
class MDNptCube(RecordPortsMixin, ComputeCube): title = 'NPT Cube' # version = "0.1.4" classification = [['MD Simulations']] tags = ['Gromacs', 'OpenMM', 'NPT'] description = """ This Cube performs MD simulation in the NPT ensemble on the provided system. The system must have been parametrized by the Force Field cube and the system Parmed object must be present on the input record. In addition, a system identification number must be present on the input record as well. This can be accomplished by using the “ID Setting Cube”. The NPT MD simulation is performed by the selected MD engine, currently OpenMM and Gromacs only. Restraints and constraints can be used as well. Currently implicit solvent models can be used in OpenMM only. The cube requires a record as input and produces a new record with the time evolved system. The total sampling time can be set by using the “time” cube parameter while the trajectory snapshots can be set by using the “trajectory_interval” cube parameter. """ uuid = "602d397b-d8a5-4388-a94a-ac3a54ff3bad" # Override defaults for some parameters parameter_overrides = { "gpu_count": { "default": 1 }, "instance_type": { "default": "g3.4xlarge" }, # Gpu Family selection "memory_mb": { "default": 14000 }, "spot_policy": { "default": "Allowed" }, "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_count": { "default": 1 } # 1 molecule at a time } temperature = parameters.DecimalParameter('temperature', default=300.0, help_text="Temperature (Kelvin)") pressure = parameters.DecimalParameter('pressure', default=1.0, help_text="Pressure (atm)") time = parameters.DecimalParameter( 'time', default=0.01, help_text="NPT simulation time in nanoseconds") restraints = parameters.StringParameter( 'restraints', default='', help_text=""""Mask selection to apply harmonic restraints. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around""") restraintWt = parameters.DecimalParameter( 'restraintWt', default=2.0, help_text="Restraint weight for xyz atom restraints in kcal/(mol A^2)") restraint_to_reference = parameters.BooleanParameter( 'restraint_to_reference', default=True, help_text= 'If True the starting reference system coordinates will be used ' 'to restraint the system') freeze = parameters.StringParameter( 'freeze', default='', help_text="""Mask selection to freeze atoms along the MD simulation. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around. Not currently implemented in Gromacs""" ) nonbondedCutoff = parameters.DecimalParameter( 'nonbondedCutoff', default=10, help_text="""The non-bonded cutoff in angstroms. This is ignored if non-bonded method is NoCutoff""") constraints = parameters.StringParameter( 'constraints', default='H-Bonds', choices=['None', 'H-Bonds', 'H-Angles', 'All-Bonds'], help_text="""None, H-Bonds, H-Angles, or All-Bonds Which type of constraints to add to the system. None means no bonds are constrained. HBonds means bonds with hydrogen are constrained, etc.""") implicit_solvent = parameters.StringParameter( 'implicit_solvent', default='None', choices=['None', 'HCT', 'OBC1', 'OBC2', 'GBn', 'GBn2'], help_text="Implicit Solvent Model. Not Currently implemented in Gromacs" ) trajectory_interval = parameters.DecimalParameter( 'trajectory_interval', default=0.0, help_text="""Time interval for trajectory snapshots in ns. If 0 the trajectory file will not be generated""") reporter_interval = parameters.DecimalParameter( 'reporter_interval', default=0.0, help_text="""Time interval for reporting data in ns. If 0 the reporter file will not be generated""") trajectory_frames = parameters.IntegerParameter( 'trajectory_frames', default=0, help_text="""The total number of trajectory frames. If it is set to zero and the trajectory interval parameter is set to zero no trajectory is generated. If it is different from zero and the trajectory interval parameter is set to zero the produced trajectory will have the selected number of frames. If different from zero and the trajectory interval parameter is different from zero the total number of generated frames will be calculated by just using the trajectory interval and the md time step (2fs and 4fs hmr on)""" ) suffix = parameters.StringParameter( 'suffix', default='npt', help_text='Filename suffix for output simulation files') center = parameters.BooleanParameter( 'center', default=False, help_text='Center the system to the OpenMM unit cell') verbose = parameters.BooleanParameter( 'verbose', default=True, help_text='Increase log file verbosity') hmr = parameters.BooleanParameter( 'hmr', default=True, help_text= 'On enables Hydrogen Mass Repartitioning. Not currently implemented in Gromacs' ) save_md_stage = parameters.BooleanParameter( 'save_md_stage', default=False, help_text="""Save the MD simulation stage. If True the MD, simulation data will be appended to the md simulation stages otherwise the last MD stage will be overwritten""") md_engine = parameters.StringParameter( 'md_engine', default='OpenMM', choices=['OpenMM', 'Gromacs'], help_text='Select the MD available engine') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.opt['SimType'] = 'npt' return def process(self, record, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) opt['CubeTitle'] = self.title # Logger string str_logger = '-' * 32 + ' NPT CUBE PARAMETERS ' + '-' * 32 str_logger += "\n{:<25} = {:<10}".format("Cube Title", opt['CubeTitle']) for k, v in sorted(self.parameters().items()): tmp_default = copy.deepcopy(v) if v.default is None: tmp_default.default = 'None' elif isinstance(v, parameters.BooleanParameter): if v.default: tmp_default.default = 'True' else: tmp_default.default = 'False' else: tmp_description = textwrap.fill(" ".join( v.description.split()), subsequent_indent=' ' * 39, width=80) str_logger += "\n{:<25} = {:<10} {}".format( k, getattr(self.args, tmp_default.name), tmp_description) str_logger += "\n{:<25} = {:<10}".format("Simulation Type", opt['SimType']) # Create the MD record to use the MD Record API mdrecord = MDDataRecord(record) system_title = mdrecord.get_title opt['system_title'] = system_title opt['system_id'] = mdrecord.get_flask_id flask = mdrecord.get_stage_topology() mdstate = mdrecord.get_stage_state() # Update cube simulation parameters for field in record.get_fields(include_meta=True): field_name = field.get_name() if field_name in ['temperature', 'pressure']: rec_value = record.get_value(field) opt[field_name] = rec_value opt['Logger'].info( "{} Updating parameters for molecule: {} {} = {}". format(self.title, system_title, field_name, rec_value)) if opt['restraint_to_reference']: opt['reference_state'] = mdrecord.get_stage_state( stg_name=MDStageNames.ForceField) opt['out_directory'] = mdrecord.cwd opt['molecule'] = flask opt['str_logger'] = str_logger opt['Logger'].info('[{}] START NPT SIMULATION: {}'.format( opt['CubeTitle'], system_title)) opt['out_fn'] = os.path.basename(opt['out_directory']) + '_' + \ opt['system_title'] + '_' + \ str(opt['system_id']) + '-' + \ opt['suffix'] # Trajectory file name if any generated opt['trj_fn'] = opt['out_fn'] + '_' + 'traj.tar.gz' # Extract the Parmed structure and synchronize it with the last MD stage state parmed_structure = mdrecord.get_parmed(sync_stage_name='last') # Run the MD simulation new_mdstate = md_simulation(mdstate, parmed_structure, opt) # Update the system coordinates flask.SetCoords(new_mdstate.get_oe_positions()) mdrecord.set_flask(flask) # Trajectory if opt['trajectory_interval'] or opt['trajectory_frames']: trajectory_fn = opt['trj_fn'] if opt['md_engine'] == MDEngines.OpenMM: trajectory_engine = MDEngines.OpenMM else: trajectory_engine = MDEngines.Gromacs else: # Empty Trajectory trajectory_fn = None trajectory_engine = None data_fn = opt['out_fn'] + '.tar.gz' if not mdrecord.add_new_stage( self.title, MDStageTypes.NPT, flask, new_mdstate, data_fn, append=opt['save_md_stage'], log=opt['str_logger'], trajectory_fn=trajectory_fn, trajectory_engine=trajectory_engine, trajectory_orion_ui=opt['system_title'] + '_' + str(opt['system_id']) + '-' + opt['suffix'] + '.tar.gz'): raise ValueError("Problems adding in the new NPT Stage") self.success.emit(mdrecord.get_record) del mdrecord except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
class MDMinimizeCube(RecordPortsMixin, ComputeCube): title = 'Minimization Cube' # version = "0.1.4" classification = [["MD Simulations"]] tags = ['OpenMM', 'Gromacs', 'Minimization'] description = """ This Cube performs energy minimization on the provided system. The system must have been parametrized by the Force Field cube and the system Parmed object must be present on the input record. In addition, a system identification number must be present on the input record as well. This can be accomplished by using the “ID Setting Cube”. The system minimization is performed by the selected MD engine, currently OpenMM and Gromacs only. Restraints and constraints can be used as well. Currently implicit solvent models can be used in OpenMM only. The cube requires a record as input and produces a new record with the minimized system. """ uuid = "bdfeaabe-f93b-4a14-9754-d6ca0c18a009" # Override defaults for some parameters parameter_overrides = { "gpu_count": { "default": 1 }, "instance_type": { "default": "g3.4xlarge" }, # Gpu Family selection "memory_mb": { "default": 14000 }, "spot_policy": { "default": "Allowed" }, "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_count": { "default": 1 } # 1 molecule at a time } steps = parameters.IntegerParameter( 'steps', default=2000, help_text="""Number of minimization steps. If 0 the minimization will continue until convergence""") restraints = parameters.StringParameter( 'restraints', default='', help_text=""""Mask selection to apply harmonic restraints. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around""") restraintWt = parameters.DecimalParameter( 'restraintWt', default=5.0, help_text="Restraint weight for xyz atom restraints in kcal/(mol A^2)") restraint_to_reference = parameters.BooleanParameter( 'restraint_to_reference', default=True, help_text= 'If True the starting reference system coordinates will be used ' 'to restraint the system') freeze = parameters.StringParameter( 'freeze', default='', help_text="""Mask selection to freeze atoms along the MD simulation. Possible keywords are: ligand, protein, water, ions, ca_protein, cofactors. The selection can be refined by using logical tokens: not, noh, and, or, diff, around. NOTE: Not currently implemented in Gromacs""") temperature = parameters.DecimalParameter('temperature', default=300, help_text="Temperature (Kelvin)") nonbondedCutoff = parameters.DecimalParameter( 'nonbondedCutoff', default=10, help_text="""The non-bonded cutoff in angstroms. This is ignored if the non-bonded method is NoCutoff""") constraints = parameters.StringParameter( 'constraints', default='H-Bonds', choices=['None', 'H-Bonds', 'H-Angles', 'All-Bonds'], help_text="""None, H-Bonds, H-Angles, or All-Bonds Which type of constraints to add to the system. None means no bonds are constrained. H-Bonds means bonds with hydrogen are constrained, etc.""") implicit_solvent = parameters.StringParameter( 'implicit_solvent', default='None', choices=['None', 'HCT', 'OBC1', 'OBC2', 'GBn', 'GBn2'], help_text="Implicit Solvent Model. NOTE:" "Not currently implemented in Gromacs") center = parameters.BooleanParameter( 'center', default=True, description='Center the system to the OpenMM and Gromacs unit cell') verbose = parameters.BooleanParameter( 'verbose', default=True, description='Increase log file verbosity') suffix = parameters.StringParameter( 'suffix', default='min', help_text='Filename suffix for output simulation files') hmr = parameters.BooleanParameter( 'hmr', default=False, description='On enables Hydrogen Mass Repartitioning. NOTE:' 'Not currently implemented in Gromacs') save_md_stage = parameters.BooleanParameter( 'save_md_stage', default=True, help_text="""Save the MD simulation stage. If True the MD, simulation data will be appended to the md simulation stages otherwise the last MD stage will be overwritten""") md_engine = parameters.StringParameter( 'md_engine', default='OpenMM', choices=['OpenMM', 'Gromacs'], help_text='Select the MD available engine') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.opt['SimType'] = 'min' return def process(self, record, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) opt['CubeTitle'] = self.title # Logger string str_logger = '-' * 32 + ' MIN CUBE PARAMETERS ' + '-' * 32 str_logger += "\n{:<25} = {:<10}".format("Cube Title", opt['CubeTitle']) for k, v in sorted(self.parameters().items()): tmp_default = copy.deepcopy(v) if v.default is None: tmp_default.default = 'None' elif isinstance(v, parameters.BooleanParameter): if v.default: tmp_default.default = 'True' else: tmp_default.default = 'False' else: tmp_description = textwrap.fill(" ".join( v.description.split()), subsequent_indent=' ' * 39, width=80) str_logger += "\n{:<25} = {:<10} {}".format( k, getattr(self.args, tmp_default.name), tmp_description) str_logger += "\n{:<25} = {:<10}".format("Simulation Type", opt['SimType']) # Create the MD record to use the MD Record API mdrecord = MDDataRecord(record) system_title = mdrecord.get_title opt['system_title'] = system_title opt['system_id'] = mdrecord.get_flask_id flask = mdrecord.get_stage_topology() mdstate = mdrecord.get_stage_state() if opt['restraint_to_reference']: opt['reference_state'] = mdrecord.get_stage_state( stg_name=MDStageNames.ForceField) opt['out_directory'] = mdrecord.cwd opt['molecule'] = flask opt['str_logger'] = str_logger opt['Logger'].info('[{}] MINIMIZING System: {}'.format( opt['CubeTitle'], system_title)) # Extract the Parmed structure and synchronize it with the last MD stage state parmed_structure = mdrecord.get_parmed(sync_stage_name='last') # Run the MD simulation new_mdstate = md_simulation(mdstate, parmed_structure, opt) # Update the flask coordinates flask.SetCoords(new_mdstate.get_oe_positions()) mdrecord.set_flask(flask) data_fn = os.path.basename( mdrecord.cwd) + '_' + opt['system_title'] + '_' + str( opt['system_id']) + '-' + opt['suffix'] + '.tar.gz' if not mdrecord.add_new_stage(self.title, MDStageTypes.MINIMIZATION, flask, new_mdstate, data_fn, append=opt['save_md_stage'], log=opt['str_logger']): raise ValueError("Problems adding the new Minimization Stage") self.success.emit(mdrecord.get_record) del mdrecord except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
class MDComponentCube(RecordPortsMixin, ComputeCube): title = "MD Setting" # version = "0.1.4" classification = [["System Preparation"]] tags = ['Protein'] description = """ This Cube is used to componentize the cube input system. The cube detects if a Design Unit (DU) is present on the record and it will extract the DU components in an ad-hoc container (MDComponents). If the DU is not found on the input record, the cube will try to create a DU by using the primary molecule present on the record; if it fails the primary molecule will be split in components by using a more canonical splitting function. """ uuid = "b85d652f-188a-4cc0-aefd-35c98e737f8d" # Override defaults for some parameters parameter_overrides = { "memory_mb": { "default": 14000 }, "spot_policy": { "default": "Prohibited" }, "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_count": { "default": 1 } # 1 molecule at a time } flask_title = parameters.StringParameter('flask_title', default='', help_text='Flask Title') multiple_flasks = parameters.BooleanParameter( 'multiple_flasks', default=False, help_text="If Checked/True multiple flasks will be allowed") def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log self.count = 0 self.opt['CubeTitle'] = self.title def process(self, record, port): try: if self.count > 0 and not self.opt['multiple_flasks']: raise ValueError("Multiple Flasks have been Detected") name = self.opt['flask_title'] if record.has_value(Fields.design_unit_from_spruce): du = record.get_value(Fields.design_unit_from_spruce) self.opt['Logger'].info("[{}] Design Unit Detected".format( self.title)) if not name: title_first12 = du.GetTitle()[0:12] if title_first12: name = title_first12 else: name = 'Flask' md_components = MDComponents(du, components_title=name) else: # The extended protein is already prepared to MD standard if not record.has_value(Fields.primary_molecule): raise ValueError("Missing Primary Molecule field") molecules = record.get_value(Fields.primary_molecule) if not name: title_first12 = molecules.GetTitle()[0:12] if title_first12: name = title_first12 else: name = 'protein' md_components = MDComponents(molecules, components_title=name) # self.opt['Logger'].info(md_components.get_info) record.set_value(Fields.md_components, md_components) record.set_value(Fields.title, name) record.set_value(Fields.flaskid, self.count) self.count += 1 self.success.emit(record) except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
class SolvationCube(RecordPortsMixin, ComputeCube): title = "Solvation Packmol" # version = "0.1.4" classification = [["System Preparation"]] tags = ['Complex', 'Protein', 'Ligand', 'Solvation'] description = """ The solvation cube solvates a given solute input system by a periodic box of a solvent or a selected mixture of solvents. The solvents can be specified by comma separated smiles strings of each solvent component or selected keywords like tip3p for tip3p water geometry. For each component the user needs to specify its molar fractions as well. The solution can be neutralized by adding counter-ions. In addition, the ionic solution strength can be set adding salt. The cube requires a record as input with a solute molecule to solvate and produces an output record with the solvated solute. """ uuid = "2e6130f6-2cba-48a4-9ef3-351a2970258a" # Override defaults for some parameters parameter_overrides = { "memory_mb": { "default": 14000 }, "spot_policy": { "default": "Allowed" }, "prefetch_count": { "default": 1 }, # 1 molecule at a time "item_count": { "default": 1 } # 1 molecule at a time } density = parameters.DecimalParameter('density', default=1.03, help_text="Solution density in g/ml") padding_distance = parameters.DecimalParameter( 'padding_distance', default=8.0, help_text= "The padding distance between the solute and the box edge in A") distance_between_atoms = parameters.DecimalParameter( 'distance_between_atoms', default=2.0, help_text="The minimum distance between atoms in A") solvents = parameters.StringParameter( 'solvents', default='tip3p', help_text= 'Select solvents. The solvents are specified as comma separated smiles strings' 'e.g. [H]O[H], C(Cl)(Cl)Cl, CS(=O)C or special keywords like tip3p') molar_fractions = parameters.StringParameter( 'molar_fractions', default='1.0', help_text= "Molar fractions of each solvent components. The molar fractions are specified" "as comma separated molar fractions strings e.g. 0.5,0.2,0.3") verbose = parameters.BooleanParameter('verbose', default=False, help_text='Output Packmol log') geometry = parameters.StringParameter( 'geometry', default='box', choices=['box', 'sphere'], help_text= "Geometry selection: box or sphere. Sphere cannot be used as periodic system " "along with MD simulation") close_solvent = parameters.BooleanParameter( 'close_solvent', default=False, help_text= "If Checked/True solvent molecules will be placed very close to the solute" ) salt = parameters.StringParameter( 'salt', default='[Na+], [Cl-]', help_text='Salt type. The salt is specified as list of smiles strings. ' 'Each smiles string is the salt component dissociated in the ' 'solution e.g. Na+, Cl-') salt_concentration = parameters.DecimalParameter( 'salt_concentration', default=50.0, help_text="Salt concentration in millimolar") neutralize_solute = parameters.BooleanParameter( 'neutralize_solute', default=True, help_text= 'Neutralize the solute by adding Na+ and Cl- counter-ions based on' 'the solute formal charge') def begin(self): self.opt = vars(self.args) self.opt['Logger'] = self.log def process(self, record, port): try: opt = dict(self.opt) if not record.has_value(Fields.md_components): raise ValueError("Missing the MD Components Field") md_components = record.get_value(Fields.md_components) solute, map_comp = md_components.create_flask if not record.has_value(Fields.title): self.log.warn("Missing Title field") solute_title = solute.GetTitle()[0:12] else: solute_title = record.get_value(Fields.title) self.log.info("[{}] solvating flask {}".format( self.title, solute_title)) # Update cube simulation parameters for field in record.get_fields(include_meta=True): field_name = field.get_name() if field_name in ['molar_fractions', 'density', 'solvents']: rec_value = record.get_value(field) if field_name == 'molar_fractions': opt[field_name] = str(rec_value) else: opt[field_name] = rec_value opt['Logger'].info( "{} Updating parameters for molecule: {} {} = {}". format(self.title, solute.GetTitle(), field_name, rec_value)) # Set the flag to return the solvent molecule components opt['return_components'] = True # Solvate the system sol_system, solvent, salt, counter_ions = packmol.oesolvate( solute, **opt) # Separate the Water from the solvent pred_water = oechem.OEIsWater(checkHydrogens=True) water = oechem.OEMol() oechem.OESubsetMol(water, solvent, pred_water) if water.NumAtoms(): if md_components.has_water: water_comp = md_components.get_water if not oechem.OEAddMols(water_comp, water): raise ValueError( "Cannot add the MD Component Water and the Packmol Water" ) md_components.set_water(water_comp) else: md_components.set_water(water) pred_not_water = oechem.OENotAtom( oechem.OEIsWater(checkHydrogens=True)) solvent_not_water = oechem.OEMol() oechem.OESubsetMol(solvent_not_water, solvent, pred_not_water) if solvent_not_water.NumAtoms(): solvent = solvent_not_water else: solvent = oechem.OEMol() self.log.info( "[{}] Solvated simulation flask {} yielding {} atoms overall". format(self.title, solute_title, sol_system.NumAtoms())) sol_system.SetTitle(solute.GetTitle()) if salt is not None and counter_ions is not None: if not oechem.OEAddMols(counter_ions, salt): raise ValueError( "Cannot add the salt component and the counter ion component" ) elif salt is not None: counter_ions = salt else: pass if md_components.has_solvent: solvent_comp = md_components.get_solvent if not oechem.OEAddMols(solvent_comp, solvent): raise ValueError( "Cannot add the MD Component solvent and the Packmol Solvent" ) else: solvent_comp = solvent if solvent_comp.NumAtoms(): md_components.set_solvent(solvent_comp) if counter_ions is not None: if md_components.has_counter_ions: counter_ions_comp = md_components.get_counter_ions if not oechem.OEAddMols(counter_ions_comp, counter_ions): raise ValueError( "Cannot add the MD Component counter ions and the Packmol counter ions" ) else: counter_ions_comp = counter_ions md_components.set_counter_ions(counter_ions_comp) # Set Box Vectors vec_data = pack_utils.getData(sol_system, tag='box_vectors') box_vec = pack_utils.decodePyObj(vec_data) md_components.set_box_vectors(box_vec) flask, map_comp = md_components.create_flask record.set_value(Fields.md_components, md_components) record.set_value(Fields.flask, flask) record.set_value(Fields.title, solute_title) self.success.emit(record) except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return