def test_false(self): record = OERecord() self.test_runner.set_parameters(switch=False) self.test_runner.start() self.test_runner.cube.process(record, "intake") self.test_runner.finalize() self.assertEqual(self.test_runner.outputs["success"].qsize(), 0) self.assertEqual(self.test_runner.outputs["failure"].qsize(), 1)
def setUp(self): fname = os.path.join(FILE_DIR, "mdrecord.oedb") ifs = oechem.oeifstream(fname) records = [] for record in read_records(ifs): records.append(record) self.assertEqual(len(records), 1) self.record = OERecord(records[0]) self.mdrecord = MDDataRecord(records[0]) self.cwd = os.getcwd() os.chdir(FILE_DIR)
def test_set_parmed(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) pmd = new_mdrecord.get_parmed() new_mdrecord.delete_field(Fields.pmd_structure) self.assertFalse(new_mdrecord.has_parmed) new_mdrecord.set_parmed(pmd, sync_stage_name='last') self.assertTrue(new_mdrecord.has_parmed)
def process(self, initialRecord, port): try: if not initialRecord.has_value(Fields.primary_molecule): raise ValueError("Missing Primary Molecule field") ligand = initialRecord.get_value(Fields.primary_molecule) # place the entire initial record as a sub-record, to be restored when conformer runs are gathered record = OERecord() record.set_value(Fields.ligInit_rec, initialRecord) if oechem.OECalculateMolecularWeight(ligand) > 1500.0: # Units are in Dalton raise ValueError("[{}] The molecule {} seems to have a large molecular weight for a " "ligand: {:.2f} Da)".format(self.title, ligand.GetTitle(), oechem.OECalculateMolecularWeight(ligand))) # Removing Interaction Hint Container, Style and PDB Data oechem.OEDeleteInteractionsHintSerializationData(ligand) oechem.OEDeleteInteractionsHintSerializationIds(ligand) oechem.OEClearStyle(ligand) oechem.OEClearPDBData(ligand) # Ligand sanitation ligand = oeommutils.sanitizeOEMolecule(ligand) lig_title = ligand.GetTitle() if lig_title == "": lig_title = 'LIG' record.set_value(Fields.ligand_name, lig_title) for at in ligand.GetAtoms(): residue = oechem.OEAtomGetResidue(at) residue.SetName(self.args.lig_res_name) oechem.OEAtomSetResidue(at, residue) record.set_value(Fields.primary_molecule, ligand) record.set_value(Fields.ligid, self.ligand_count) self.success.emit(record) self.ligand_count += 1 self.max_runs += ligand.NumConfs() except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format(str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(initialRecord)
def test_true(self): # set_parameters must be called before calling start self.test_runner.set_parameters(switch=True) # This calls the begin function of the cube self.test_runner.start() # We pass records to the process function this way num_to_send = 10 for i in range(num_to_send): record = OERecord() self.test_runner.cube.process(record, "intake") # This calls the end process on the cube self.test_runner.finalize() # Now check the output self.assertEqual(self.test_runner.outputs["success"].qsize(), num_to_send) self.assertEqual(self.test_runner.outputs["failure"].qsize(), 0)
def test_success(self): print('Testing cube:', self.cube.name) # File name of a charged ligand lig_fname = os.path.join(FILE_DIR, "lig_CAT13a_chg.oeb.gz") # Read OEMol molecule ligand = oechem.OEMol() with oechem.oemolistream(lig_fname) as ifs: oechem.OEReadMolecule(ifs, ligand) ligand_copy = ligand.CreateCopy() # Set the partial charge to zero for at in ligand_copy.GetAtoms(): at.SetPartialCharge(0.0) ligand_record = OERecord() ligand_record.set_value(Fields.primary_molecule, ligand_copy) ligand_record.set_value(Fields.flaskid, 0) ligand_record.set_value(Fields.title, ligand_copy.GetTitle()) # Process the molecules self.cube.process(ligand_record, self.cube.intake.name) # Assert that one molecule was emitted on the success port self.assertEqual(self.runner.outputs['success'].qsize(), 1) # Assert that zero molecules were emitted on the failure port self.assertEqual(self.runner.outputs['failure'].qsize(), 0) # Check out ligand out_record = self.runner.outputs["success"].get() out_ligand = out_record.get_value(Fields.primary_molecule) # Loop through atoms and make sure partial charges were set for iat, oat in zip(ligand.GetAtoms(), out_ligand.GetAtoms()): self.assertNotEqual(iat.GetPartialCharge(), oat.GetPartialCharge)
def test_add_new_stage(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) topology = self.mdrecord.get_stage_topology() md_state = self.mdrecord.get_stage_state() self.assertTrue( new_mdrecord.add_new_stage("Testing", MDStageTypes.FEC, topology, md_state, "test.tar.gz", log='TestingLogs')) self.assertEqual(len(new_mdrecord.get_value(Fields.md_stages)), 4) new_last_stage = new_mdrecord.get_stage_by_name(stg_name='Testing') self.assertEqual(new_last_stage.get_value(Fields.stage_name), 'Testing') self.assertEqual(new_last_stage.get_value(Fields.stage_type), MDStageTypes.FEC)
def __iter__(self): datasets = list(self.args.data_in) files = list(self.args.tpr) if len(datasets) > 0: for dataset in datasets: for record in dataset.records(): yield record elif len(files) > 0: if self.args.prefix_name == "": self.args.prefix_name = "PROT" for file_obj in files: with TemporaryPath(suffix=".tpr") as path: file_obj.copy_to(path) with open(path, "rb") as f: tpr_bytes = f.read() record = OERecord() record.set_value(Fields.tpr_field, tpr_bytes) record.set_value(Fields.prefix_name_field, self.args.prefix_name) yield record break else: raise ValueError( "A Gromacs input .tpr file or a restart dataset is required")
def data_trajectory_extraction(ctx, name, only): check_only = ['a', 'stages', 'parmed', 'protein_confs'] for v in only: if v not in check_only: raise ValueError( "The only keyword value is not recognized {}. Option available: {}" .format(only, check_only[1:])) session = ctx.obj['session'] ofs = oechem.oeofstream(name) for record in tqdm(ctx.obj['records']): new_record = OERecord(record) if not record.has_field(Fields.collection): raise ValueError( "No Collection field has been found in the record") collection_id = record.get_value(Fields.collection) collection = session.get_resource(ShardCollection, collection_id) new_stages = [] if 'a' in only or 'stages' in only: mdrecord = MDDataRecord(record) stages = mdrecord.get_stages system_title = mdrecord.get_title sys_id = mdrecord.get_flask_id for stage in stages: stg_type = stage.get_value(Fields.stage_type) new_stage = OERecord(stage) with TemporaryDirectory() as output_directory: data_fn = os.path.basename( output_directory) + '_' + system_title + '_' + str( sys_id) + '-' + stg_type + '.tar.gz' shard_id = stage.get_value( OEField("MDData_OPLMD", Types.Int)) shard = session.get_resource(Shard(collection=collection), shard_id) shard.download_to_file(data_fn) new_stage.delete_field(OEField("MDData_OPLMD", Types.Int)) new_stage.set_value(Fields.mddata, data_fn) if stage.has_field(OEField("Trajectory_OPLMD", Types.Int)): trj_field = stage.get_field("Trajectory_OPLMD") trj_meta = trj_field.get_meta() md_engine = trj_meta.get_attribute( Meta.Annotation.Description) trj_id = stage.get_value(trj_field) trj_fn = os.path.basename( output_directory) + '_' + system_title + '_' + str( sys_id) + '-' + stg_type + '_traj' + '.tar.gz' resource = session.get_resource(File, trj_id) resource.download_to_file(trj_fn) trj_meta = OEFieldMeta() trj_meta.set_attribute(Meta.Annotation.Description, md_engine) new_trj_field = OEField(Fields.trajectory.get_name(), Fields.trajectory.get_type(), meta=trj_meta) new_stage.delete_field( OEField("Trajectory_OPLMD", Types.Int)) new_stage.set_value(new_trj_field, trj_fn) new_stages.append(new_stage) new_record.set_value(Fields.md_stages, new_stages) if 'a' in only or 'parmed' in only: if record.has_field(OEField('Structure_Parmed_OPLMD', Types.Int)): pmd_id = record.get_value( OEField('Structure_Parmed_OPLMD', Types.Int)) shard = session.get_resource(Shard(collection=collection), pmd_id) with TemporaryDirectory() as output_directory: parmed_fn = os.path.join(output_directory, "parmed.pickle") shard.download_to_file(parmed_fn) with open(parmed_fn, 'rb') as f: parm_dic = pickle.load(f) pmd_structure = parmed.structure.Structure() pmd_structure.__setstate__(parm_dic) new_record.delete_field( OEField('Structure_Parmed_OPLMD', Types.Int)) new_record.set_value(Fields.pmd_structure, pmd_structure) if 'a' in only or 'protein_confs' in only: if record.has_field(OEField('OETraj', Types.Record)): oetrajrec = record.get_value(OEField('OETraj', Types.Record)) prot_conf_id = oetrajrec.get_value( OEField("ProtTraj_OPLMD", Types.Int)) shard = session.get_resource(Shard(collection=collection), prot_conf_id) with TemporaryDirectory() as output_directory: protein_fn = os.path.join(output_directory, "prot_traj_confs.oeb") shard.download_to_file(protein_fn) protein_conf = oechem.OEMol() with oechem.oemolistream(protein_fn) as ifs: oechem.OEReadMolecule(ifs, protein_conf) oetrajrec.delete_field(OEField('ProtTraj_OPLMD', Types.Int)) oetrajrec.set_value(Fields.protein_traj_confs, protein_conf) new_record.set_value(OEField('OETraj', Types.Record), oetrajrec) new_record.delete_field(Fields.collection) OEWriteRecord(ofs, new_record, fmt='binary') ofs.close()
def test_delete_parmed(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) new_mdrecord.delete_parmed self.assertFalse(new_mdrecord.has_parmed)
class MDRecordTests(unittest.TestCase): """ Testing MD Record API """ def setUp(self): fname = os.path.join(FILE_DIR, "mdrecord.oedb") ifs = oechem.oeifstream(fname) records = [] for record in read_records(ifs): records.append(record) self.assertEqual(len(records), 1) self.record = OERecord(records[0]) self.mdrecord = MDDataRecord(records[0]) self.cwd = os.getcwd() os.chdir(FILE_DIR) @pytest.mark.travis @pytest.mark.local def test_get_primary(self): mol = self.mdrecord.get_value(Fields.primary_molecule) self.assertEqual(mol.NumAtoms(), self.mdrecord.get_primary.NumAtoms()) @pytest.mark.travis @pytest.mark.local def test_set_primary(self): mol = self.mdrecord.get_value(Fields.primary_molecule) self.assertTrue(self.mdrecord.set_primary(mol)) @pytest.mark.travis @pytest.mark.local def test_get_flask(self): mol = self.mdrecord.get_value(Fields.flask) self.assertEqual(mol.NumAtoms(), self.mdrecord.get_flask.NumAtoms()) @pytest.mark.travis @pytest.mark.local def test_set_flask(self): mol = self.mdrecord.get_value(Fields.flask) self.assertTrue(self.mdrecord.set_flask(mol)) @pytest.mark.travis @pytest.mark.local def test_get_flaskid(self): id = self.mdrecord.get_value(Fields.flaskid) self.assertEqual(id, 0) @pytest.mark.travis @pytest.mark.local def test_has_flaskid(self): self.assertTrue(self.mdrecord.has_flask_id) @pytest.mark.travis @pytest.mark.local def test_set_flaskid(self): self.mdrecord.set_flask_id(5) self.assertEqual(self.mdrecord.get_flask_id, 5) @pytest.mark.travis @pytest.mark.local def test_get_lig_id(self): id = self.mdrecord.get_value(Fields.ligid) self.assertEqual(id, 0) @pytest.mark.travis @pytest.mark.local def test_has_lig_id(self): self.assertTrue(self.mdrecord.has_lig_id) @pytest.mark.travis @pytest.mark.local def test_set_lig_id(self): self.mdrecord.set_lig_id(5) self.assertEqual(self.mdrecord.get_lig_id, 5) @pytest.mark.travis @pytest.mark.local def test_get_conf_id(self): id = self.mdrecord.get_value(Fields.confid) self.assertEqual(id, 0) @pytest.mark.travis @pytest.mark.local def test_has_conf_id(self): self.assertTrue(self.mdrecord.has_conf_id) @pytest.mark.travis @pytest.mark.local def test_set_conf_id(self): self.mdrecord.set_conf_id(5) self.assertEqual(self.mdrecord.get_conf_id, 5) @pytest.mark.travis @pytest.mark.local def test_get_title(self): title = self.mdrecord.get_value(Fields.title) self.assertEqual(title, 'pPRT_ltoluene') @pytest.mark.travis @pytest.mark.local def test_has_tile(self): self.assertTrue(self.mdrecord.has_title) @pytest.mark.travis @pytest.mark.local def test_set_title(self): self.mdrecord.set_title("Pippo") self.assertEqual(self.mdrecord.get_title, 'Pippo') @pytest.mark.travis @pytest.mark.local def test_get_last_stage(self): last_stage = self.mdrecord.get_last_stage self.assertEqual(last_stage.get_value(Fields.stage_name), 'Production') self.assertEqual(last_stage.get_value(Fields.stage_type), 'NPT') @pytest.mark.travis @pytest.mark.local def test_get_stage_by_name(self): last_stage = self.mdrecord.get_stage_by_name() self.assertEqual(last_stage.get_value(Fields.stage_name), 'Production') self.assertEqual(last_stage.get_value(Fields.stage_type), 'NPT') param_stage = self.mdrecord.get_stage_by_name( stg_name='System Parametrization') self.assertEqual(param_stage.get_value(Fields.stage_name), 'System Parametrization') self.assertEqual(param_stage.get_value(Fields.stage_type), 'SETUP') param_stage = self.mdrecord.get_stage_by_name( stg_name='System Minimization') self.assertEqual(param_stage.get_value(Fields.stage_name), 'System Minimization') self.assertEqual(param_stage.get_value(Fields.stage_type), 'MINIMIZATION') with self.assertRaises(ValueError): self.mdrecord.get_stage_by_name('Error') # @pytest.mark.local # def test_delete_stage_by_name(self): # new_record = OERecord(self.record) # new_mdrecord = MDDataRecord(new_record) # # new_mdrecord.delete_stage_by_name(stg_name='System Minimization') # self.assertFalse(new_mdrecord.has_stage_name('System Minimization')) # self.assertEqual(len(new_mdrecord.get_stages), 2) # TODO THIS IS GOING TO DELETE FILES LOCALLY KEEP DISABLED @pytest.mark.travis @pytest.mark.local def test_has_stage_name(self): self.assertTrue(self.mdrecord.has_stage_name('Production')) self.assertFalse(self.mdrecord.has_stage_name('Error')) @pytest.mark.travis @pytest.mark.local def test_get_stage_by_idx(self): with self.assertRaises(ValueError): self.mdrecord.get_stage_by_idx(5) self.assertEqual( self.mdrecord.get_stage_by_idx(0).get_value(Fields.stage_name), 'System Parametrization') @pytest.mark.travis @pytest.mark.local def test_get_stage_state(self): last_stage = self.mdrecord.get_last_stage mddata_fn = os.path.join(FILE_DIR, last_stage.get_value(Fields.mddata)) with TemporaryDirectory() as out_directory: with tarfile.open(mddata_fn) as tar: tar.extractall(path=out_directory) state_fn = os.path.join(out_directory, MDFileNames.state) with open(state_fn, 'rb') as f: md_state = pickle.load(f) self.assertEqual(md_state.get_positions(), self.mdrecord.get_stage_state().get_positions()) self.assertEqual(md_state.get_velocities(), self.mdrecord.get_stage_state().get_velocities()) self.assertEqual(md_state.get_box_vectors(), self.mdrecord.get_stage_state().get_box_vectors()) @pytest.mark.travis @pytest.mark.local def test_get_stage_topology(self): par_stage = self.mdrecord.get_stage_by_idx(0) mddata_fn = os.path.join(FILE_DIR, par_stage.get_value(Fields.mddata)) with TemporaryDirectory() as out_directory: with tarfile.open(mddata_fn) as tar: tar.extractall(path=out_directory) topology_fn = os.path.join(out_directory, MDFileNames.topology) topology_mol = oechem.OEMol() with oechem.oemolistream(topology_fn) as ifs: oechem.OEReadMolecule(ifs, topology_mol) topology = self.mdrecord.get_stage_topology( stg_name='System Parametrization') for mol_at, top_at in zip(topology_mol.GetAtoms(), topology.GetAtoms()): self.assertEqual(mol_at.GetAtomicNum(), top_at.GetAtomicNum()) @pytest.mark.travis @pytest.mark.local def test_get_stage_info(self): last_stage = self.mdrecord.get_last_stage info = last_stage.get_value(Fields.log_data) self.assertEqual(info, self.mdrecord.get_stage_info()) min_stage = self.mdrecord.get_stage_by_name( stg_name='System Minimization') info = min_stage.get_value(Fields.log_data) self.assertEqual( info, self.mdrecord.get_stage_info(stg_name='System Minimization')) @pytest.mark.travis @pytest.mark.local def test_get_stage_trajectory(self): self.assertTrue(os.path.isfile(self.mdrecord.get_stage_trajectory())) @pytest.mark.travis @pytest.mark.local def test_add_new_stage(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) topology = self.mdrecord.get_stage_topology() md_state = self.mdrecord.get_stage_state() self.assertTrue( new_mdrecord.add_new_stage("Testing", MDStageTypes.FEC, topology, md_state, "test.tar.gz", log='TestingLogs')) self.assertEqual(len(new_mdrecord.get_value(Fields.md_stages)), 4) new_last_stage = new_mdrecord.get_stage_by_name(stg_name='Testing') self.assertEqual(new_last_stage.get_value(Fields.stage_name), 'Testing') self.assertEqual(new_last_stage.get_value(Fields.stage_type), MDStageTypes.FEC) @pytest.mark.travis @pytest.mark.local def test_get_stages(self): stages = self.mdrecord.get_stages self.assertEqual(len(stages), 3) @pytest.mark.travis @pytest.mark.local def test_get_stages_names(self): ls_names = [ 'System Parametrization', 'System Minimization', 'Production' ] stg_names = self.mdrecord.get_stages_names self.assertEqual(stg_names, ls_names) @pytest.mark.travis @pytest.mark.local def test_has_stages(self): self.assertTrue(self.mdrecord.has_stages) @pytest.mark.travis @pytest.mark.local def test_get_parmed(self): pmd = self.mdrecord.get_parmed(sync_stage_name='last') self.assertEqual(len(pmd.atoms), 30439) self.assertEqual((len(pmd.residues)), 9446) self.assertEqual((len(pmd.bonds)), 21178) self.assertEqual((len(pmd.angles)), 14069) self.assertEqual((len(pmd.dihedrals)), 8028) @pytest.mark.travis @pytest.mark.local def test_set_parmed(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) pmd = new_mdrecord.get_parmed() new_mdrecord.delete_field(Fields.pmd_structure) self.assertFalse(new_mdrecord.has_parmed) new_mdrecord.set_parmed(pmd, sync_stage_name='last') self.assertTrue(new_mdrecord.has_parmed) @pytest.mark.travis @pytest.mark.local def test_has_parmed(self): self.assertTrue(self.mdrecord.has_parmed) @pytest.mark.travis @pytest.mark.local def test_delete_parmed(self): new_record = OERecord(self.record) new_mdrecord = MDDataRecord(new_record) new_mdrecord.delete_parmed self.assertFalse(new_mdrecord.has_parmed) @pytest.mark.travis @pytest.mark.local def test_protein_traj(self): oetraj_record = self.record.get_value(OEField('OETraj', Types.Record)) prot_mol = oetraj_record.get_value(Fields.protein_traj_confs) mdrecord = MDDataRecord(oetraj_record) mdprot = mdrecord.get_protein_traj self.assertEqual(prot_mol.NumAtoms(), mdprot.NumAtoms()) @pytest.mark.travis @pytest.mark.local def test_set_protein_traj(self): oetraj_record = self.record.get_value(OEField('OETraj', Types.Record)) prot_mol = oetraj_record.get_value(Fields.protein_traj_confs) mdrecord = MDDataRecord(oetraj_record) oetraj_record.delete_field(Fields.protein_traj_confs) with self.assertRaises(ValueError): mdrecord.get_protein_traj self.assertTrue(mdrecord.set_protein_traj(prot_mol))
def add_new_stage(self, stage_name, stage_type, topology, mdstate, data_fn, append=True, log=None, trajectory_fn=None, trajectory_engine=None, trajectory_orion_ui='OrionFile'): """ This method add a new MD stage to the MD stage record Parameters ---------- stage_name: String The new MD stage name stage_type: String The MD stage type e.g. SETUP, MINIMIZATION etc. topology: OEMol The topology mdstate: MDState The new mdstate made of state positions, velocities and box vectors data_fn: String The data file name is used only locally and is linked to the MD data associated with the stage. In Orion the data file name is not used append: Bool If the flag is set to true the stage will be appended to the MD stages otherwise the last stage will be overwritten by the new created MD stage log: String or None Log info trajectory_fn: String, Int or None The trajectory name for local run or id in Orion associated with the new MD stage trajectory_engine: String or None The MD engine used to generate the new MD stage. Possible names: OpenMM or Gromacs trajectory_orion_ui: String The trajectory string name to be displayed in the Orion UI Returns ------- boolean: Bool True if the MD stage creation was successful """ record = OERecord() record.set_value(Fields.stage_name, stage_name) record.set_value(Fields.stage_type, stage_type) if log is not None: record.set_value(Fields.log_data, log) with TemporaryDirectory() as output_directory: top_fn = os.path.join(output_directory, MDFileNames.topology) with oechem.oemolostream(top_fn) as ofs: oechem.OEWriteConstMolecule(ofs, topology) state_fn = os.path.join(output_directory, MDFileNames.state) with open(state_fn, 'wb') as f: pickle.dump(mdstate, f) with tarfile.open(data_fn, mode='w:gz') as archive: archive.add(top_fn, arcname=os.path.basename(top_fn)) archive.add(state_fn, arcname=os.path.basename(state_fn)) if trajectory_fn is not None: if not os.path.isfile(trajectory_fn): raise IOError( "The trajectory file has not been found: {}".format( trajectory_fn)) trj_meta = OEFieldMeta() trj_meta.set_attribute(Meta.Annotation.Description, trajectory_engine) trj_field = OEField(Fields.trajectory.get_name(), Fields.trajectory.get_type(), meta=trj_meta) if self.rec.has_field(Fields.md_stages): stage_names = self.get_stages_names if append: if stage_name in stage_names: raise ValueError( "The selected stage name is already present in the MD stages: {}" .format(stage_names)) else: if stage_name in stage_names and not stage_name == stage_names[ -1]: raise ValueError( "The selected stage name is already present in the MD stages: {}" .format(stage_names)) lf = utils.upload_data(data_fn, collection_id=self.collection_id, shard_name=data_fn) record.set_value(Fields.mddata, lf) if trajectory_fn is not None: lft = utils.upload_file(trajectory_fn, orion_ui_name=trajectory_orion_ui) record.set_value(trj_field, lft) stages = self.get_stages if append: stages.append(record) else: self.delete_stage_by_name('last') stages[-1] = record self.rec.set_value(Fields.md_stages, stages) else: lf = utils.upload_data(data_fn, collection_id=self.collection_id, shard_name=data_fn) record.set_value(Fields.mddata, lf) if trajectory_fn is not None: lft = utils.upload_file(trajectory_fn, orion_ui_name=trajectory_orion_ui) record.set_value(trj_field, lft) self.rec.set_value(Fields.md_stages, [record]) self.processed[stage_name] = False return True
def process(self, record, port): try: if port == 'intake': if not record.has_value(Fields.primary_molecule): raise ValueError( "Missing the ligand primary molecule field") ligand = record.get_value(Fields.primary_molecule) if ligand.NumConfs() > 1: raise ValueError( "The ligand {} has multiple conformers: {}".format( ligand.GetTitle(), ligand.GetNumConfs())) if not record.has_value(Fields.title): self.log.warn( "Missing title field '{}' field; improvising".format( Fields.title.get_name())) ligand_title = ligand.GetTitle()[0:12] else: ligand_title = record.get_value(Fields.title) protein = self.md_components.get_protein self.md_components.set_ligand(ligand) # Check if the ligand is inside the binding site. Cutoff distance 3A if not oeommutils.check_shell(ligand, protein, 3): raise ValueError( "The Ligand is probably outside the Protein binding site" ) # Remove Steric Clashes between the ligand and the other System components for comp_name, comp in self.md_components.get_components.items( ): # Skip clashes between the ligand itself and the protein if comp_name in ['ligand', 'protein']: continue # Remove Metal clashes if the distance between the metal and the ligand # is less than 1A elif comp_name == 'metals': metal_del = oeommutils.delete_shell(ligand, comp, 1.0, in_out='in') if metal_del.NumAtoms() != comp.NumAtoms(): self.opt['Logger'].info( "Detected steric-clashes between the ligand {} and metals" .format(ligand_title)) self.md_components.set_metals(metal_del) # Remove clashes if the distance between the selected component and the ligand # is less than 1.5A else: comp_del = oeommutils.delete_shell(ligand, comp, 1.5, in_out='in') if comp_del.NumAtoms() != comp.NumAtoms(): self.opt['Logger'].info( "Detected steric-clashes between the ligand {} and component {}" .format(ligand_title, comp_name)) self.md_components.set_component_by_name( comp_name, comp_del) complex_title = 'p' + self.md_components.get_title + '_l' + ligand_title mdcomp = self.md_components.copy mdcomp.set_title(complex_title) # Check Ligand lig_check = mdcomp.get_ligand smi_lig_check = oechem.OECreateSmiString(lig_check) smi_ligand = oechem.OECreateSmiString(ligand) if smi_ligand != smi_lig_check: raise ValueError( "Ligand IsoSmiles String check failure: {} vs {}". format(smi_lig_check, smi_ligand)) # the ligand is the primary molecule new_record = OERecord(record) new_record.set_value(Fields.title, complex_title) new_record.set_value(Fields.ligand, ligand) new_record.set_value(Fields.protein, protein) # Check Protein Name if protein.GetTitle(): protein_name = protein.GetTitle() else: protein_name = "prot" new_record.set_value(Fields.protein_name, protein_name) new_record.set_value(Fields.md_components, mdcomp) self.success.emit(new_record) except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
def process(self, record, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Create the MD record to use the MD Record API mdrecord = MDDataRecord(record) # Logger string opt['Logger'].info(' ') system_title = mdrecord.get_title #sys_id = mdrecord.get_flask_id opt['Logger'].info( '{}: Attempting MD Traj conversion into OEMols'.format( system_title)) traj_fn = mdrecord.get_stage_trajectory() opt['Logger'].info('{} Temp Directory: {}'.format( system_title, os.path.dirname(traj_fn))) opt['Logger'].info('{} Trajectory filename: {}'.format( system_title, traj_fn)) # Generate multi-conformer protein and ligand OEMols from the trajectory opt['Logger'].info( '{} Generating protein and ligand trajectory OEMols'.format( system_title)) flask = mdrecord.get_flask md_components = record.get_value(Fields.md_components) # opt['Logger'].info(md_components.get_info) # Check Ligand Isomeric Smiles lig_comp = md_components.get_ligand lig_ref = record.get_value(Fields.ligand) smi_lig_comp = oechem.OECreateSmiString(lig_comp) smi_lig_ref = oechem.OECreateSmiString(lig_ref) if smi_lig_ref != smi_lig_comp: raise ValueError( "Ligand Isomeric Smiles String check failure: {} vs {}". format(smi_lig_comp, smi_lig_ref)) ptraj, ltraj, wtraj = utl.extract_aligned_prot_lig_wat_traj( md_components, flask, traj_fn, opt, water_cutoff=opt['water_cutoff']) ltraj.SetTitle(record.get_value(Fields.ligand_name)) ptraj.SetTitle(record.get_value(Fields.protein_name)) opt['Logger'].info( '{} #atoms, #confs in protein traj OEMol: {}, {}'.format( system_title, ptraj.NumAtoms(), ptraj.NumConfs())) opt['Logger'].info( '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format( system_title, ltraj.NumAtoms(), ltraj.NumConfs())) opt['Logger'].info( '{} #atoms, #confs in water traj OEMol: {}, {}'.format( system_title, wtraj.NumAtoms(), wtraj.NumConfs())) # Create new record with OETraj results oetrajRecord = OERecord() oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ltraj) if wtraj: oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol), wtraj) if in_orion(): oetrajRecord.set_value(Fields.collection, mdrecord.collection_id) mdrecord_traj = MDDataRecord(oetrajRecord) mdrecord_traj.set_protein_traj(ptraj, shard_name="ProteinTrajConfs_") record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord) # update or initiate the list of analyses that have been done if record.has_value(Fields.Analysis.analysesDone): analysesDone = utl.RequestOEFieldType( record, Fields.Analysis.analysesDone) analysesDone.append('OETraj') else: analysesDone = ['OETraj'] record.set_value(Fields.Analysis.analysesDone, analysesDone) opt['Logger'].info( '{}: saved protein, ligand and water traj OEMols'.format( system_title)) self.success.emit(record) del mdrecord del mdrecord_traj except Exception as e: print("Failed to complete", str(e), flush=True) self.log.error(traceback.format_exc()) # Return failed mol self.failure.emit(record) return
def end(self): try: for sys_id, list_conf_rec in self.lig_sys_ids.items(): # Save the first record to emit in failure cases self.record = list_conf_rec # catch case where for some reason the conf list list_conf_rec is empty if len(list_conf_rec) < 1: print('{} does not have any conformer data'.format(sys_id)) continue elif len(list_conf_rec) > 1: # Conformers for each ligand are sorted based on their confid in each ligand record list_conf_rec.sort( key=lambda x: x.get_value(Fields.confid)) new_rec = OERecord() new_rec.set_value(Fields.Analysis.oetrajconf_rec, list_conf_rec) # Get the first conf to move some general ligand data up to the top level rec0 = list_conf_rec[0] # copy all the initial fields in Fields.ligInit_rec up to the top level init_rec = rec0.get_value(Fields.ligInit_rec) # TODO METADATA IS NOT COPIED? for field in init_rec.get_fields(): new_rec.set_value(field, init_rec.get_value(field)) # next, fields that will simply be copied and not further used here protein = rec0.get_value(Fields.protein) new_rec.set_value(Fields.protein, protein) ligid = rec0.get_value(Fields.ligid) new_rec.set_value(Fields.ligid, ligid) if in_orion(): collection_id = rec0.get_value(Fields.collection) new_rec.set_value(Fields.collection, collection_id) # finally, fields that will be copied and also further used here lig_multi_conf = oechem.OEMol(rec0.get_value(Fields.ligand)) protein_name = rec0.get_value(Fields.protein_name) # MD Components copied at the ligand top level new_rec.set_value(Fields.md_components, rec0.get_value(Fields.md_components)) # if >1 confs, add their confs to the parent ligand at the top level for rec in list_conf_rec[1:]: lig_multi_conf.NewConf(rec.get_value(Fields.ligand)) # get name of initial molecule if new_rec.has_value(OEPrimaryMolField()): init_mol = new_rec.get_value(OEPrimaryMolField()) else: print( '{} ConformerGatheringData: new_rec cannot find the OEPrimaryMolField' .format(sys_id)) continue lig_title = init_mol.GetTitle() lig_multi_conf.SetTitle(lig_title) # regenerate protein-ligand title since all titles on conformers include conformer id title = 'p' + protein_name + '_l' + lig_title # set other fields on the new record new_rec.set_value(Fields.title, title) new_rec.set_value(Fields.ligand, lig_multi_conf) new_rec.set_value(Fields.primary_molecule, lig_multi_conf) new_rec.set_value(Fields.protein_name, protein_name) new_rec.set_value(Fields.ligand_name, lig_title) self.success.emit(new_rec) except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(self.record)
def process(self, record, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Logger string opt['Logger'].info(' Beginning ConfTrajsToLigTraj') system_title = utl.RequestOEFieldType(record, Fields.title) opt['Logger'].info( '{} Attempting to combine conf traj OEMols into ligand traj OEMol' .format(system_title)) # Go find the ligand and LigTraj fields in each of the conformer records if not record.has_field(Fields.Analysis.oetrajconf_rec): raise ValueError( '{} could not find the conformer record'.format( system_title)) else: opt['Logger'].info( '{} found the conformer record'.format(system_title)) # set up ligand and LigTraj lists then loop over conformer records poseIdVec = [] ligTrajConfs = [] protTrajConfs = [] watTrajConfs = [] list_conf_rec = record.get_value(Fields.Analysis.oetrajconf_rec) for confrec in list_conf_rec: confid = utl.RequestOEFieldType(confrec, Fields.confid) if not confrec.has_field(Fields.Analysis.oetraj_rec): raise ValueError( '{} confID {}: could not find traj record'.format( system_title, confid)) oetrajRecord = confrec.get_value(Fields.Analysis.oetraj_rec) # Extract the ligand traj OEMol from the OETraj record ligTraj = utl.RequestOEField(oetrajRecord, 'LigTraj', Types.Chem.Mol) poseIdVec += [confid] * ligTraj.NumConfs() ligTrajConfs.append(ligTraj) opt['Logger'].info( '{} confID {}: adding ligTraj with {} atoms, {} confs'. format(system_title, confid, ligTraj.NumAtoms(), ligTraj.NumConfs())) # Extract the activeSite water traj OEMol from the OETraj record watTraj = utl.RequestOEField(oetrajRecord, 'WatTraj', Types.Chem.Mol) watTrajConfs.append(watTraj) opt['Logger'].info( '{} confID {}: adding watTraj with {} atoms, {} confs'. format(system_title, confid, watTraj.NumAtoms(), watTraj.NumConfs())) # Extract the protTraj OEMol from the OETraj record mdtrajrecord = MDDataRecord(oetrajRecord) protTraj = mdtrajrecord.get_protein_traj protTrajConfs.append(protTraj) opt['Logger'].info( '{} confID {}: adding protTraj with {} atoms, {} confs'. format(system_title, confid, protTraj.NumAtoms(), protTraj.NumConfs())) del mdtrajrecord if len(ligTrajConfs) < 1 or len(protTrajConfs) < 1: raise ValueError( '{} empty list of lig or protein trajectory OEMols'.format( system_title)) ligTraj = oechem.OEMol(ligTrajConfs[0]) xyz = oechem.OEFloatArray(3 * ligTraj.GetMaxAtomIdx()) for trajMol in ligTrajConfs[1:]: for conf in trajMol.GetConfs(): conf.GetCoords(xyz) ligTraj.NewConf(xyz) opt['Logger'].info( '{} composite ligTraj has {} atoms, {} confs'.format( system_title, ligTraj.NumAtoms(), ligTraj.NumConfs())) watTraj = oechem.OEMol(watTrajConfs[0]) xyz = oechem.OEFloatArray(3 * watTraj.GetMaxAtomIdx()) for trajMol in watTrajConfs[1:]: for conf in trajMol.GetConfs(): conf.GetCoords(xyz) watTraj.NewConf(xyz) opt['Logger'].info( '{} composite watTraj has {} atoms, {} confs'.format( system_title, watTraj.NumAtoms(), watTraj.NumConfs())) protTraj = protTrajConfs[0] xyz = oechem.OEFloatArray(3 * protTraj.GetMaxAtomIdx()) for trajMol in protTrajConfs[1:]: for conf in trajMol.GetConfs(): conf.GetCoords(xyz) protTraj.NewConf(xyz) opt['Logger'].info( '{} composite protTraj has {} atoms, {} confs'.format( system_title, protTraj.NumAtoms(), protTraj.NumConfs())) record.set_value(Fields.Analysis.poseIdVec, poseIdVec) # Create new record with OETraj results oetrajRecord = OERecord() oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ligTraj) if watTraj: oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol), watTraj) if in_orion(): collection_id = utl.RequestOEFieldType(record, Fields.collection) oetrajRecord.set_value(Fields.collection, collection_id) mdrecord_traj = MDDataRecord(oetrajRecord) mdrecord_traj.set_protein_traj(protTraj, shard_name="ProteinTrajConfs_") record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord) self.success.emit(record) except Exception as e: print("Failed to complete", str(e), flush=True) opt['Logger'].info( 'Exception {} in ConfTrajsToLigTraj on {}'.format( str(e), system_title)) self.log.error(traceback.format_exc()) # Return failed mol self.failure.emit(record) return