Example #1
0
def RequestOEField(record, field, rType):
    if not record.has_value(OEField(field, rType)):
        # opt['Logger'].warn('Missing record field {}'.format( field))
        print('Missing record field {}'.format(field))
        raise ValueError('The record does not have field {}'.format(field))
    else:
        return record.get_value(OEField(field, rType))
Example #2
0
    class FEC:
        # Free Energy
        free_energy = OEField('FE_OPLMD',
                              Types.Float,
                              meta=OEFieldMeta().set_option(
                                  Meta.Units.Energy.kCal_per_mol))

        metaFreeEnergy_err = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy)
        free_energy_err = OEField('FE_Error_OPLMD',
                                  Types.Float,
                                  meta=metaFreeEnergy_err)

        class RBFEC:
            # Oriented Edge field for relative free energy calculations
            # The first integer of the list is the ligand ID of the starting
            # thermodynamic state and the second the final one
            edgeid = OEField("EdgeID_OPLMD", Types.Int, meta=_metaHidden)
            edge_name = OEField("EdgeName_OPLMD", Types.String)

            # The Thermodynamics leg type is used for Bound and
            # UnBound State run identification
            thd_leg_type = OEField("Thd_Leg_OPLMD",
                                   Types.String,
                                   meta=_metaHidden)

            class NESC:

                state_A = OEField("StateA_OPLMD", Types.Record)
                state_B = OEField("StateB_OPLMD", Types.Record)

                gmx_top = OEField("GMX_Top_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                gmx_gro = OEField("GMX_Gro_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                work = OEField("GMX_Work_OPLMD",
                               Types.Float,
                               meta=OEFieldMeta().set_option(
                                   Meta.Units.Energy.kJ_per_mol))
                frame_count = OEField("frame_count",
                                      Types.Int,
                                      meta=_metaHidden)

                # The Work record is used to collect the data related to the
                # Work Forward and Reverse for the Bound and Unbound States
                work_rec = OEField("Work_Record_OPLMD", Types.Record)

                # The Relative Binding Affinity record collects data for the
                # different analysis methods used to compute it
                DDG_rec = OEField("DDG_Record_OPLMD", Types.Record)
Example #3
0
            class NESC:

                state_A = OEField("StateA_OPLMD", Types.Record)
                state_B = OEField("StateB_OPLMD", Types.Record)

                gmx_top = OEField("GMX_Top_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                gmx_gro = OEField("GMX_Gro_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                work = OEField("GMX_Work_OPLMD",
                               Types.Float,
                               meta=OEFieldMeta().set_option(
                                   Meta.Units.Energy.kJ_per_mol))
                frame_count = OEField("frame_count",
                                      Types.Int,
                                      meta=_metaHidden)

                # The Work record is used to collect the data related to the
                # Work Forward and Reverse for the Bound and Unbound States
                work_rec = OEField("Work_Record_OPLMD", Types.Record)

                # The Relative Binding Affinity record collects data for the
                # different analysis methods used to compute it
                DDG_rec = OEField("DDG_Record_OPLMD", Types.Record)
Example #4
0
    def test_protein_traj(self):

        oetraj_record = self.record.get_value(OEField('OETraj', Types.Record))

        prot_mol = oetraj_record.get_value(Fields.protein_traj_confs)

        mdrecord = MDDataRecord(oetraj_record)

        mdprot = mdrecord.get_protein_traj

        self.assertEqual(prot_mol.NumAtoms(), mdprot.NumAtoms())
Example #5
0
    def test_set_protein_traj(self):
        oetraj_record = self.record.get_value(OEField('OETraj', Types.Record))

        prot_mol = oetraj_record.get_value(Fields.protein_traj_confs)

        mdrecord = MDDataRecord(oetraj_record)

        oetraj_record.delete_field(Fields.protein_traj_confs)

        with self.assertRaises(ValueError):
            mdrecord.get_protein_traj

        self.assertTrue(mdrecord.set_protein_traj(prot_mol))
Example #6
0
class Fields:
    # Current number of MD steps
    current_iteration_field = OEField("Current_Iterations_OMD", Types.Int)

    # Total number of MD steps
    md_nsteps_field = OEField("MD_nsteps_OMD", Types.Int)

    # Current number of cycles
    cycle_id = OEField("Cycle_ID_OMD", Types.Int)

    # Tpr binary file
    tpr_field = OEField("TPR_bytes_OMD", Types.Blob, meta=_metaHidden)

    # Prefix name field
    prefix_name_field = OEField("Prefix_OPLMD", Types.String)

    if in_orion():
        trajectory = OEField("GMXTrajectory_OMD", Types.Int, meta=_metaHidden)
        gmx_restart = OEField("GMXRestart_OMD", Types.Int, meta=_metaHidden)
    else:
        trajectory = OEField("GMXTrajectory_OMD", Types.String, meta=_metaHidden)
        gmx_restart = OEField("GMXRestart_OMD", Types.String, meta=_metaHidden)
Example #7
0
def data_trajectory_extraction(ctx, name, only):

    check_only = ['a', 'stages', 'parmed', 'protein_confs']

    for v in only:
        if v not in check_only:
            raise ValueError(
                "The only keyword value is not recognized {}. Option available: {}"
                .format(only, check_only[1:]))

    session = ctx.obj['session']

    ofs = oechem.oeofstream(name)

    for record in tqdm(ctx.obj['records']):

        new_record = OERecord(record)

        if not record.has_field(Fields.collection):
            raise ValueError(
                "No Collection field has been found in the record")

        collection_id = record.get_value(Fields.collection)

        collection = session.get_resource(ShardCollection, collection_id)

        new_stages = []

        if 'a' in only or 'stages' in only:

            mdrecord = MDDataRecord(record)

            stages = mdrecord.get_stages

            system_title = mdrecord.get_title
            sys_id = mdrecord.get_flask_id

            for stage in stages:

                stg_type = stage.get_value(Fields.stage_type)
                new_stage = OERecord(stage)

                with TemporaryDirectory() as output_directory:
                    data_fn = os.path.basename(
                        output_directory) + '_' + system_title + '_' + str(
                            sys_id) + '-' + stg_type + '.tar.gz'
                    shard_id = stage.get_value(
                        OEField("MDData_OPLMD", Types.Int))

                    shard = session.get_resource(Shard(collection=collection),
                                                 shard_id)
                    shard.download_to_file(data_fn)

                    new_stage.delete_field(OEField("MDData_OPLMD", Types.Int))
                    new_stage.set_value(Fields.mddata, data_fn)

                    if stage.has_field(OEField("Trajectory_OPLMD", Types.Int)):

                        trj_field = stage.get_field("Trajectory_OPLMD")

                        trj_meta = trj_field.get_meta()
                        md_engine = trj_meta.get_attribute(
                            Meta.Annotation.Description)

                        trj_id = stage.get_value(trj_field)
                        trj_fn = os.path.basename(
                            output_directory) + '_' + system_title + '_' + str(
                                sys_id) + '-' + stg_type + '_traj' + '.tar.gz'

                        resource = session.get_resource(File, trj_id)
                        resource.download_to_file(trj_fn)

                        trj_meta = OEFieldMeta()
                        trj_meta.set_attribute(Meta.Annotation.Description,
                                               md_engine)
                        new_trj_field = OEField(Fields.trajectory.get_name(),
                                                Fields.trajectory.get_type(),
                                                meta=trj_meta)

                        new_stage.delete_field(
                            OEField("Trajectory_OPLMD", Types.Int))
                        new_stage.set_value(new_trj_field, trj_fn)

                new_stages.append(new_stage)

            new_record.set_value(Fields.md_stages, new_stages)

        if 'a' in only or 'parmed' in only:
            if record.has_field(OEField('Structure_Parmed_OPLMD', Types.Int)):
                pmd_id = record.get_value(
                    OEField('Structure_Parmed_OPLMD', Types.Int))
                shard = session.get_resource(Shard(collection=collection),
                                             pmd_id)

                with TemporaryDirectory() as output_directory:
                    parmed_fn = os.path.join(output_directory, "parmed.pickle")

                    shard.download_to_file(parmed_fn)

                    with open(parmed_fn, 'rb') as f:
                        parm_dic = pickle.load(f)

                    pmd_structure = parmed.structure.Structure()
                    pmd_structure.__setstate__(parm_dic)

                new_record.delete_field(
                    OEField('Structure_Parmed_OPLMD', Types.Int))
                new_record.set_value(Fields.pmd_structure, pmd_structure)

        if 'a' in only or 'protein_confs' in only:
            if record.has_field(OEField('OETraj', Types.Record)):

                oetrajrec = record.get_value(OEField('OETraj', Types.Record))

                prot_conf_id = oetrajrec.get_value(
                    OEField("ProtTraj_OPLMD", Types.Int))

                shard = session.get_resource(Shard(collection=collection),
                                             prot_conf_id)

                with TemporaryDirectory() as output_directory:
                    protein_fn = os.path.join(output_directory,
                                              "prot_traj_confs.oeb")

                    shard.download_to_file(protein_fn)

                    protein_conf = oechem.OEMol()

                    with oechem.oemolistream(protein_fn) as ifs:
                        oechem.OEReadMolecule(ifs, protein_conf)

                oetrajrec.delete_field(OEField('ProtTraj_OPLMD', Types.Int))
                oetrajrec.set_value(Fields.protein_traj_confs, protein_conf)

                new_record.set_value(OEField('OETraj', Types.Record),
                                     oetrajrec)

        new_record.delete_field(Fields.collection)

        OEWriteRecord(ofs, new_record, fmt='binary')

    ofs.close()
Example #8
0
    def add_new_stage(self,
                      stage_name,
                      stage_type,
                      topology,
                      mdstate,
                      data_fn,
                      append=True,
                      log=None,
                      trajectory_fn=None,
                      trajectory_engine=None,
                      trajectory_orion_ui='OrionFile'):
        """
        This method add a new MD stage to the MD stage record

        Parameters
        ----------
        stage_name: String
            The new MD stage name
        stage_type: String
            The MD stage type e.g. SETUP, MINIMIZATION etc.
        topology: OEMol
            The topology
        mdstate: MDState
            The new mdstate made of state positions, velocities and box vectors
        data_fn: String
            The data file name is used only locally and is linked to the MD data associated
            with the stage. In Orion the data file name is not used
        append: Bool
            If the flag is set to true the stage will be appended to the MD stages otherwise
            the last stage will be overwritten by the new created MD stage
        log: String or None
            Log info
        trajectory_fn: String, Int or None
            The trajectory name for local run or id in Orion associated with the new MD stage
        trajectory_engine: String or None
            The MD engine used to generate the new MD stage. Possible names: OpenMM or Gromacs
        trajectory_orion_ui: String
            The trajectory string name to be displayed in the Orion UI

        Returns
        -------
        boolean: Bool
            True if the MD stage creation was successful
        """

        record = OERecord()

        record.set_value(Fields.stage_name, stage_name)
        record.set_value(Fields.stage_type, stage_type)

        if log is not None:
            record.set_value(Fields.log_data, log)

        with TemporaryDirectory() as output_directory:

            top_fn = os.path.join(output_directory, MDFileNames.topology)

            with oechem.oemolostream(top_fn) as ofs:
                oechem.OEWriteConstMolecule(ofs, topology)

            state_fn = os.path.join(output_directory, MDFileNames.state)

            with open(state_fn, 'wb') as f:
                pickle.dump(mdstate, f)

            with tarfile.open(data_fn, mode='w:gz') as archive:
                archive.add(top_fn, arcname=os.path.basename(top_fn))
                archive.add(state_fn, arcname=os.path.basename(state_fn))

        if trajectory_fn is not None:

            if not os.path.isfile(trajectory_fn):
                raise IOError(
                    "The trajectory file has not been found: {}".format(
                        trajectory_fn))

            trj_meta = OEFieldMeta()
            trj_meta.set_attribute(Meta.Annotation.Description,
                                   trajectory_engine)
            trj_field = OEField(Fields.trajectory.get_name(),
                                Fields.trajectory.get_type(),
                                meta=trj_meta)

        if self.rec.has_field(Fields.md_stages):

            stage_names = self.get_stages_names

            if append:
                if stage_name in stage_names:
                    raise ValueError(
                        "The selected stage name is already present in the MD stages: {}"
                        .format(stage_names))

            else:
                if stage_name in stage_names and not stage_name == stage_names[
                        -1]:
                    raise ValueError(
                        "The selected stage name is already present in the MD stages: {}"
                        .format(stage_names))

            lf = utils.upload_data(data_fn,
                                   collection_id=self.collection_id,
                                   shard_name=data_fn)

            record.set_value(Fields.mddata, lf)

            if trajectory_fn is not None:
                lft = utils.upload_file(trajectory_fn,
                                        orion_ui_name=trajectory_orion_ui)
                record.set_value(trj_field, lft)

            stages = self.get_stages

            if append:
                stages.append(record)
            else:
                self.delete_stage_by_name('last')
                stages[-1] = record

            self.rec.set_value(Fields.md_stages, stages)

        else:

            lf = utils.upload_data(data_fn,
                                   collection_id=self.collection_id,
                                   shard_name=data_fn)

            record.set_value(Fields.mddata, lf)

            if trajectory_fn is not None:
                lft = utils.upload_file(trajectory_fn,
                                        orion_ui_name=trajectory_orion_ui)
                record.set_value(trj_field, lft)

            self.rec.set_value(Fields.md_stages, [record])

        self.processed[stage_name] = False

        return True
Example #9
0
    def process(self, record, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Create the MD record to use the MD Record API
            mdrecord = MDDataRecord(record)

            # Logger string
            opt['Logger'].info(' ')
            system_title = mdrecord.get_title
            #sys_id = mdrecord.get_flask_id
            opt['Logger'].info(
                '{}: Attempting MD Traj conversion into OEMols'.format(
                    system_title))

            traj_fn = mdrecord.get_stage_trajectory()

            opt['Logger'].info('{} Temp Directory: {}'.format(
                system_title, os.path.dirname(traj_fn)))
            opt['Logger'].info('{} Trajectory filename: {}'.format(
                system_title, traj_fn))

            # Generate multi-conformer protein and ligand OEMols from the trajectory
            opt['Logger'].info(
                '{} Generating protein and ligand trajectory OEMols'.format(
                    system_title))

            flask = mdrecord.get_flask

            md_components = record.get_value(Fields.md_components)

            # opt['Logger'].info(md_components.get_info)

            # Check Ligand Isomeric Smiles
            lig_comp = md_components.get_ligand
            lig_ref = record.get_value(Fields.ligand)

            smi_lig_comp = oechem.OECreateSmiString(lig_comp)
            smi_lig_ref = oechem.OECreateSmiString(lig_ref)

            if smi_lig_ref != smi_lig_comp:
                raise ValueError(
                    "Ligand Isomeric Smiles String check failure: {} vs {}".
                    format(smi_lig_comp, smi_lig_ref))

            ptraj, ltraj, wtraj = utl.extract_aligned_prot_lig_wat_traj(
                md_components,
                flask,
                traj_fn,
                opt,
                water_cutoff=opt['water_cutoff'])

            ltraj.SetTitle(record.get_value(Fields.ligand_name))
            ptraj.SetTitle(record.get_value(Fields.protein_name))

            opt['Logger'].info(
                '{} #atoms, #confs in protein traj OEMol: {}, {}'.format(
                    system_title, ptraj.NumAtoms(), ptraj.NumConfs()))
            opt['Logger'].info(
                '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format(
                    system_title, ltraj.NumAtoms(), ltraj.NumConfs()))
            opt['Logger'].info(
                '{} #atoms, #confs in water traj OEMol: {}, {}'.format(
                    system_title, wtraj.NumAtoms(), wtraj.NumConfs()))

            # Create new record with OETraj results
            oetrajRecord = OERecord()

            oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ltraj)

            if wtraj:
                oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol),
                                       wtraj)

            if in_orion():
                oetrajRecord.set_value(Fields.collection,
                                       mdrecord.collection_id)

            mdrecord_traj = MDDataRecord(oetrajRecord)

            mdrecord_traj.set_protein_traj(ptraj,
                                           shard_name="ProteinTrajConfs_")

            record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord)

            # update or initiate the list of analyses that have been done
            if record.has_value(Fields.Analysis.analysesDone):
                analysesDone = utl.RequestOEFieldType(
                    record, Fields.Analysis.analysesDone)
                analysesDone.append('OETraj')
            else:
                analysesDone = ['OETraj']

            record.set_value(Fields.Analysis.analysesDone, analysesDone)

            opt['Logger'].info(
                '{}: saved protein, ligand  and water traj OEMols'.format(
                    system_title))

            self.success.emit(record)

            del mdrecord
            del mdrecord_traj

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            self.log.error(traceback.format_exc())
            # Return failed mol
            self.failure.emit(record)

        return
Example #10
0
    def process(self, record, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Logger string
            opt['Logger'].info(' Beginning ConfTrajsToLigTraj')
            system_title = utl.RequestOEFieldType(record, Fields.title)
            opt['Logger'].info(
                '{} Attempting to combine conf traj OEMols into ligand traj OEMol'
                .format(system_title))

            # Go find the ligand and LigTraj fields in each of the conformer records
            if not record.has_field(Fields.Analysis.oetrajconf_rec):
                raise ValueError(
                    '{} could not find the conformer record'.format(
                        system_title))
            else:
                opt['Logger'].info(
                    '{} found the conformer record'.format(system_title))

            # set up ligand and LigTraj lists then loop over conformer records
            poseIdVec = []
            ligTrajConfs = []
            protTrajConfs = []
            watTrajConfs = []
            list_conf_rec = record.get_value(Fields.Analysis.oetrajconf_rec)
            for confrec in list_conf_rec:
                confid = utl.RequestOEFieldType(confrec, Fields.confid)

                if not confrec.has_field(Fields.Analysis.oetraj_rec):
                    raise ValueError(
                        '{} confID {}: could not find traj record'.format(
                            system_title, confid))
                oetrajRecord = confrec.get_value(Fields.Analysis.oetraj_rec)

                # Extract the ligand traj OEMol from the OETraj record
                ligTraj = utl.RequestOEField(oetrajRecord, 'LigTraj',
                                             Types.Chem.Mol)
                poseIdVec += [confid] * ligTraj.NumConfs()
                ligTrajConfs.append(ligTraj)
                opt['Logger'].info(
                    '{} confID {}: adding ligTraj with {} atoms, {} confs'.
                    format(system_title, confid, ligTraj.NumAtoms(),
                           ligTraj.NumConfs()))

                # Extract the activeSite water traj OEMol from the OETraj record
                watTraj = utl.RequestOEField(oetrajRecord, 'WatTraj',
                                             Types.Chem.Mol)
                watTrajConfs.append(watTraj)
                opt['Logger'].info(
                    '{} confID {}: adding watTraj with {} atoms, {} confs'.
                    format(system_title, confid, watTraj.NumAtoms(),
                           watTraj.NumConfs()))

                # Extract the protTraj OEMol from the OETraj record
                mdtrajrecord = MDDataRecord(oetrajRecord)
                protTraj = mdtrajrecord.get_protein_traj
                protTrajConfs.append(protTraj)
                opt['Logger'].info(
                    '{} confID {}: adding protTraj with {} atoms, {} confs'.
                    format(system_title, confid, protTraj.NumAtoms(),
                           protTraj.NumConfs()))
                del mdtrajrecord

            if len(ligTrajConfs) < 1 or len(protTrajConfs) < 1:
                raise ValueError(
                    '{} empty list of lig or protein trajectory OEMols'.format(
                        system_title))

            ligTraj = oechem.OEMol(ligTrajConfs[0])
            xyz = oechem.OEFloatArray(3 * ligTraj.GetMaxAtomIdx())
            for trajMol in ligTrajConfs[1:]:
                for conf in trajMol.GetConfs():
                    conf.GetCoords(xyz)
                    ligTraj.NewConf(xyz)
            opt['Logger'].info(
                '{} composite ligTraj has {} atoms, {} confs'.format(
                    system_title, ligTraj.NumAtoms(), ligTraj.NumConfs()))

            watTraj = oechem.OEMol(watTrajConfs[0])
            xyz = oechem.OEFloatArray(3 * watTraj.GetMaxAtomIdx())
            for trajMol in watTrajConfs[1:]:
                for conf in trajMol.GetConfs():
                    conf.GetCoords(xyz)
                    watTraj.NewConf(xyz)
            opt['Logger'].info(
                '{} composite watTraj has {} atoms, {} confs'.format(
                    system_title, watTraj.NumAtoms(), watTraj.NumConfs()))

            protTraj = protTrajConfs[0]
            xyz = oechem.OEFloatArray(3 * protTraj.GetMaxAtomIdx())
            for trajMol in protTrajConfs[1:]:
                for conf in trajMol.GetConfs():
                    conf.GetCoords(xyz)
                    protTraj.NewConf(xyz)
            opt['Logger'].info(
                '{} composite protTraj has {} atoms, {} confs'.format(
                    system_title, protTraj.NumAtoms(), protTraj.NumConfs()))

            record.set_value(Fields.Analysis.poseIdVec, poseIdVec)

            # Create new record with OETraj results
            oetrajRecord = OERecord()
            oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ligTraj)
            if watTraj:
                oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol),
                                       watTraj)

            if in_orion():
                collection_id = utl.RequestOEFieldType(record,
                                                       Fields.collection)
                oetrajRecord.set_value(Fields.collection, collection_id)
            mdrecord_traj = MDDataRecord(oetrajRecord)
            mdrecord_traj.set_protein_traj(protTraj,
                                           shard_name="ProteinTrajConfs_")

            record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord)

            self.success.emit(record)

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            opt['Logger'].info(
                'Exception {} in ConfTrajsToLigTraj on {}'.format(
                    str(e), system_title))
            self.log.error(traceback.format_exc())
            # Return failed mol
            self.failure.emit(record)

        return
Example #11
0
    def process(self, record, port):
        try:
            opt = self.opt
            # Logger string
            opt['Logger'].info(' Beginning TrajInteractionEnergyCube')

            mdrecord = MDDataRecord(record)

            system_title = mdrecord.get_title

            opt['Logger'].info(
                '{} Attempting to compute MD Traj protein-ligand Interaction energies'
                .format(system_title))

            # Check that the OETraj analysis has been done
            analysesDone = utl.RequestOEFieldType(record,
                                                  Fields.Analysis.analysesDone)
            if 'OETraj' not in analysesDone:
                raise ValueError(
                    '{} does not have OETraj analyses done'.format(
                        system_title))
            else:
                opt['Logger'].info(
                    '{} found OETraj analyses'.format(system_title))

            # Extract the relevant traj OEMols from the OETraj record
            oetrajRecord = utl.RequestOEFieldType(record,
                                                  Fields.Analysis.oetraj_rec)
            opt['Logger'].info('{} found OETraj record'.format(system_title))
            ligTraj = utl.RequestOEField(oetrajRecord, 'LigTraj',
                                         Types.Chem.Mol)
            opt['Logger'].info(
                '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format(
                    system_title, ligTraj.NumAtoms(), ligTraj.NumConfs()))

            mdtrajrecord = MDDataRecord(oetrajRecord)
            protTraj = mdtrajrecord.get_protein_traj

            opt['Logger'].info(
                '{} #atoms, #confs in protein traj OEMol: {}, {}'.format(
                    system_title, protTraj.NumAtoms(), protTraj.NumConfs()))

            water_traj = oetrajRecord.get_value(
                OEField('WatTraj', Types.Chem.Mol))
            opt['Logger'].info(
                '{} #atoms, #confs in water traj OEMol: {}, {}'.format(
                    system_title, water_traj.NumAtoms(),
                    water_traj.NumConfs()))

            prmed = mdrecord.get_parmed(sync_stage_name='last')

            # Compute interaction energies for the protein, ligand, complex and water subsystems
            intEdata = mmpbsa.ProtLigWatInteractionEFromParmedOETraj(
                prmed, ligTraj, protTraj, water_traj, opt)

            if intEdata is None:
                raise ValueError(
                    '{} Calculation of Interaction Energies failed'.format(
                        system_title))

            # protein and ligand traj OEMols now have parmed charges on them; save these
            oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ligTraj)
            record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord)

            # list the energy terms in the intEdata dict to be stored on the record
            for key in intEdata.keys():
                opt['Logger'].info('{} traj intEdata[{}] of length {}'.format(
                    system_title, key, len(intEdata[key])))
                # change any NaNs to a really big float or else Orion WriterCube fails on JSON dict
                for i, x in enumerate(intEdata[key]):
                    if math.isnan(x):
                        opt['Logger'].info(
                            '{} found a NaN at intEdata[{}][{}]'.format(
                                system_title, key, i))
                        intEdata[key][i] = magic_big_float_to_replace_NaN

            # Add the intEdata dict to the record
            record.set_value(Fields.Analysis.oeintE_dict, intEdata)

            # Add the trajIntE record to the parent record
            #record.set_value(Fields.Analysis.oeintE_rec, trajIntE)

            analysesDone.append('TrajIntE')
            record.set_value(Fields.Analysis.analysesDone, analysesDone)
            opt['Logger'].info(
                '{} finished writing trajIntE OERecord'.format(system_title))

            self.success.emit(record)

            del mdrecord
            del mdtrajrecord

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            opt['Logger'].info(
                'Exception {} in TrajInteractionEnergyCube on {}'.format(
                    str(e), system_title))
            self.log.error(traceback.format_exc())
            # Return failed mol
            self.failure.emit(record)

        return
Example #12
0
    def process(self, record, port):
        try:
            opt = self.opt
            # Logger string
            opt['Logger'].info(' Beginning TrajPBSACube')
            system_title = utl.RequestOEFieldType(record, Fields.title)
            opt['Logger'].info(
                '{} Attempting to compute MD Traj PBSA energies'.format(
                    system_title))

            # Check that the OETraj analysis has been done
            analysesDone = utl.RequestOEFieldType(record,
                                                  Fields.Analysis.analysesDone)
            if 'OETraj' not in analysesDone:
                raise ValueError(
                    '{} does not have OETraj analyses done'.format(
                        system_title))
            else:
                opt['Logger'].info(
                    '{} found OETraj analyses'.format(system_title))

            # Extract the relevant traj OEMols from the OETraj record
            oetrajRecord = utl.RequestOEFieldType(record,
                                                  Fields.Analysis.oetraj_rec)
            opt['Logger'].info('{} found OETraj record'.format(system_title))
            ligTraj = utl.RequestOEField(oetrajRecord, 'LigTraj',
                                         Types.Chem.Mol)
            opt['Logger'].info(
                '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format(
                    system_title, ligTraj.NumAtoms(), ligTraj.NumConfs()))

            mdtrajrecord = MDDataRecord(oetrajRecord)

            if self.opt['explicit_water']:

                water_traj = oetrajRecord.get_value(
                    OEField('WatTraj', Types.Chem.Mol))
                opt['Logger'].info(
                    '{} #atoms, #confs in water traj OEMol: {}, {}'.format(
                        system_title, water_traj.NumAtoms(),
                        water_traj.NumConfs()))

                protTraj = mdtrajrecord.get_protein_traj

                prot_wat = oechem.OEMol(protTraj.GetActive())
                oechem.OEAddMols(prot_wat, water_traj.GetActive())

                prot_wat.DeleteConfs()

                for pr_conf, wat_conf in zip(protTraj.GetConfs(),
                                             water_traj.GetConfs()):
                    pr_wat_conf = oechem.OEMol(pr_conf)
                    oechem.OEAddMols(pr_wat_conf, wat_conf)
                    pr_wat_conf_xyz = oechem.OEFloatArray(prot_wat.NumAtoms() *
                                                          3)
                    pr_wat_conf.GetCoords(pr_wat_conf_xyz)
                    prot_wat.NewConf(pr_wat_conf_xyz)

                protTraj = prot_wat
            else:
                protTraj = mdtrajrecord.get_protein_traj

            opt['Logger'].info(
                '{} #atoms, #confs in protein traj OEMol: {}, {}'.format(
                    system_title, protTraj.NumAtoms(), protTraj.NumConfs()))

            # Compute PBSA energies for the protein-ligand complex
            PBSAdata = mmpbsa.TrajPBSA(ligTraj, protTraj)
            if PBSAdata is None:
                raise ValueError(
                    '{} Calculation of PBSA energies failed'.format(
                        system_title))

            # generate Surface Areas energy for buried SA based on 0.006 kcal/mol/A^2
            PBSAdata['OEZap_SA6_Bind'] = [
                sa * -0.006 for sa in PBSAdata['OEZap_BuriedArea']
            ]

            # If the OETraj Interaction Energies has been done calculate MMPBSA values
            if 'TrajIntE' in analysesDone:
                opt['Logger'].info(
                    '{} found TrajIntE analyses'.format(system_title))

                # Extract the relevant P-L Interaction Energies from the record
                intEdata = record.get_value(Fields.Analysis.oeintE_dict)
                opt['Logger'].info(
                    '{} found Traj intEdata data'.format(system_title))

                if self.opt['explicit_water']:

                    PLIntE = intEdata['protein_and_water_ligand_interE']
                    opt['Logger'].info(
                        '{} found Protein-Water and Ligand force field interaction energies'
                        .format(system_title))
                else:

                    PLIntE = intEdata['protein_ligand_interE']
                    opt['Logger'].info(
                        '{} found Protein-Ligand force field interaction energies'
                        .format(system_title))

                # Calculate  and store MMPB and MMPBSA energies on the trajPBSA record
                PBSAdata['OEZap_MMPB_Bind'] = [
                    eInt + eDesol for eInt, eDesol in zip(
                        PLIntE, PBSAdata['OEZap_PB_Desolvation'])
                ]
                PBSAdata['OEZap_MMPBSA6_Bind'] = [
                    eMMPB + eSA6
                    for eMMPB, eSA6 in zip(PBSAdata['OEZap_MMPB_Bind'],
                                           PBSAdata['OEZap_SA6_Bind'])
                ]

            # list field and change any NaNs to a really big float
            for key in PBSAdata.keys():
                opt['Logger'].info(
                    '{} TrajPBSACube PBSAdata[{}] of length {}'.format(
                        system_title, key, len(PBSAdata[key])))
                # change any NaNs to a really big float or else Orion WriterCube fails on JSON dict
                for i, x in enumerate(PBSAdata[key]):
                    if math.isnan(x):
                        opt['Logger'].info(
                            '{} found a NaN at PBSAdata[{}][{}]'.format(
                                system_title, key, i))
                        PBSAdata[key][i] = magic_big_float_to_replace_NaN

            # Add the PBSAdata dict to the record
            record.set_value(Fields.Analysis.oepbsa_dict, PBSAdata)

            analysesDone.append('TrajPBSA')
            record.set_value(Fields.Analysis.analysesDone, analysesDone)
            opt['Logger'].info(
                '{} finished writing TrajPBSA OERecord'.format(system_title))

            self.success.emit(record)

            del mdtrajrecord

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            self.opt['Logger'].info('Exception {} in TrajPBSACube'.format(
                str(e)))
            self.log.error(traceback.format_exc())
            # Return failed mol
            self.failure.emit(record)

        return
Example #13
0
class Fields:

    # The LigInitialRecord Field is for the initial ligand record read in at the start
    ligInit_rec = OEField("LigInitial", Types.Record, meta=_metaHidden)

    # The Title field is a string name for the flask which used to compose file names
    title = OEField("Title_OPLMD", Types.String, meta=_metaIDHidden)

    # The flaskid field is a unique integer for each flask (final system for simulation)
    flaskid = OEField("FlaskID_OPLMD", Types.Int, meta=_metaIDHidden)

    # The ligid field is a unique integer used to keep track of the ligand input order
    ligid = OEField("LigID_OPLMD", Types.Int, meta=_metaIDHidden)

    # The ConfID field is used to identify a particular conformer
    confid = OEField("ConfID_OPLMD", Types.Int, meta=_metaIDHidden)

    # The Ligand field should be used to save in a record a ligand as an OEMolecule
    ligand = OEField(
        "Ligand_OPLMD",
        Types.Chem.Mol,
        meta=OEFieldMeta(
            options=[Meta.Hints.Chem.Ligand, Meta.Display.Hidden]))

    # The ligand name
    ligand_name = OEField("Ligand_name_OPLMD", Types.String, meta=_metaHidden)

    # The protein field should be used to save in a record a Protein as an OEMolecule
    protein = OEField("Protein_OPLMD", Types.Chem.Mol, meta=_metaProtHidden)

    # The protein name
    protein_name = OEField("Protein_name_OPLMD",
                           Types.String,
                           meta=_metaHidden)

    # The super-molecule for the entire flask (ie the final system for simulation)
    flask = OEField("Flask_OPLMD", Types.Chem.Mol, meta=_metaHidden)

    # Primary Molecule
    primary_molecule = OEPrimaryMolField()

    # Parmed Structure, Trajectory, MDData and Protein trajectory conformers Fields
    if in_orion():
        pmd_structure = OEField('Structure_Parmed_OPLMD',
                                Types.Int,
                                meta=_metaHidden)
        trajectory = OEField("Trajectory_OPLMD", Types.Int, meta=_metaHidden)
        mddata = OEField("MDData_OPLMD", Types.Int, meta=_metaHidden)
        protein_traj_confs = OEField("ProtTraj_OPLMD",
                                     Types.Int,
                                     meta=_metaHidden)
    else:
        pmd_structure = OEField('Structure_Parmed_OPLMD',
                                ParmedData,
                                meta=_metaHidden)
        trajectory = OEField("Trajectory_OPLMD",
                             Types.String,
                             meta=_metaHidden)
        mddata = OEField("MDData_OPLMD", Types.String, meta=_metaHidden)
        protein_traj_confs = OEField("ProtTraj_OPLMD",
                                     Types.Chem.Mol,
                                     meta=_metaHidden)

    # The Stage Name
    stage_name = OEField('Stage_name_OPLMD', Types.String)

    # The Stage Type
    stage_type = OEField('Stage_type_OPLMD', Types.String)

    # Topology Field
    topology = OEField('Topology_OPLMD',
                       Types.Chem.Mol,
                       meta=OEFieldMeta().set_option(
                           Meta.Hints.Chem.PrimaryMol))

    # Log Info
    log_data = OEField('Log_data_OPLMD', Types.String)

    # MD State
    md_state = OEField("MDState_OPLMD", MDStateData)

    # Design Unit Field
    design_unit = OEField('Design_Unit_OPLMD', DesignUnit)

    # Design Unit Field from Spruce
    # design_unit_from_spruce = OEField('du_single', Types.Blob)
    design_unit_from_spruce = OEField('designunit', Types.Chem.DesignUnit)

    # MD Components
    md_components = OEField('MDComponents_OPLMD', MDComponentData)

    # Collection is used to offload data from the record which must be < 100Mb
    collection = OEField("Collection_ID_OPLMD", Types.Int, meta=_metaHidden)

    # Stage list Field
    md_stages = OEField("MDStages_OPLMD", Types.RecordVec, meta=_metaHidden)

    floe_report = OEField('Floe_report_OPLMD', Types.String, meta=_metaHidden)

    floe_report_svg_lig_depiction = OEField("Floe_report_lig_svg_OPLMD",
                                            Types.String,
                                            meta=OEFieldMeta().set_option(
                                                Meta.Hints.Image_SVG))

    floe_report_label = OEField('Floe_report_label_OPLMD',
                                Types.String,
                                meta=_metaHidden)

    floe_report_URL = OEField('Floe_report_URL_OPLMD',
                              Types.String,
                              meta=OEFieldMeta(options=[Meta.Hints.URL]))

    floe_report_collection_id = OEField('Floe_report_ID_OPLMD',
                                        Types.Int,
                                        meta=_metaHidden)

    class Analysis:

        # The poseIdVec vector addresses an input poseid for each traj frame
        poseIdVec = OEField("PoseIdVec", Types.IntVec, meta=_metaHidden)

        # The OETraj Field is for the record containing Traj OEMols and energies
        oetraj_rec = OEField("OETraj", Types.Record, meta=_metaHidden)

        # The TrajIntE Field is for the record containing Traj interaction energies
        oeintE_rec = OEField("TrajIntE", Types.Record, meta=_metaHidden)

        # The TrajIntEDict Field is for the POD Dictionary containing Traj interaction energies
        oeintE_dict = OEField("TrajIntEDict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The TrajPBSA Field is for the record containing Traj PBSA energies
        oepbsa_rec = OEField("TrajPBSA", Types.Record, meta=_metaHidden)

        # The TrajPBSADict Field is for the POD Dictionary containing Traj PBSA energies
        oepbsa_dict = OEField("TrajPBSADict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The TrajClus Field is for the record containing Traj ligand clustering results
        oeclus_rec = OEField("TrajClus", Types.Record, meta=_metaHidden)

        # The TrajClusDict Field is for the POD Dictionary containing Traj ligand clustering results
        oeclus_dict = OEField("TrajClusDict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The ClusPopDict Field is for the POD Dictionary containing conf/cluster population results
        cluspop_dict = OEField("ClusPopDict",
                               Types.JSONObject,
                               meta=_metaHidden)

        # The AnalysesDone Field is for a list of the analyses that have been done
        analysesDone = OEField("AnalysesDone",
                               Types.StringVec,
                               meta=_metaHidden)

        # The Lig_Conf_Data Field is for the record containing Traj conf data for all confs
        oetrajconf_rec = OEField("Lig_Conf_Data",
                                 Types.RecordVec,
                                 meta=_metaHidden)

        # The vector of ligand Traj RMSDs from the initial pose
        lig_traj_rmsd = OEField('LigTrajRMSD',
                                Types.FloatVec,
                                meta=OEFieldMeta().set_option(
                                    Meta.Units.Length.Ang))

        # The mmpbsa Field contains the vector of per-frame mmpbsa values over the whole trajectory
        zapMMPBSA_fld = OEField("OEZap_MMPBSA6_Bind",
                                Types.FloatVec,
                                meta=OEFieldMeta().set_option(
                                    Meta.Units.Energy.kCal))

        # mmpbsa ensemble average over the whole trajectory
        mmpbsa_traj_mean = OEField('MMPBSATrajMean',
                                   Types.Float,
                                   meta=OEFieldMeta().set_option(
                                       Meta.Units.Energy.kCal_per_mol))

        metaMMPBSA_traj_serr = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaMMPBSA_traj_serr.add_relation(Meta.Relations.ErrorsFor,
                                          mmpbsa_traj_mean)
        mmpbsa_traj_serr = OEField('MMPBSATrajSerr',
                                   Types.Float,
                                   meta=metaMMPBSA_traj_serr)

        # The number of major clusters found
        n_major_clusters = OEField("n major clusters", Types.Int)

        # Trajectory cluster averages and medians of protein and ligand
        ClusLigAvg_fld = OEField('ClusLigAvgMol', Types.Chem.Mol)
        ClusProtAvg_fld = OEField('ClusProtAvgMol', Types.Chem.Mol)
        ClusLigMed_fld = OEField('ClusLigMedMol', Types.Chem.Mol)
        ClusProtMed_fld = OEField('ClusProtMedMol', Types.Chem.Mol)

        max_waters = OEField("MaxWaters_OPLMD", Types.Int, meta=_metaHidden)

        # Free Energy Yank
        # Analysis Fields
        free_energy = OEField('FE_OPLMD',
                              Types.Float,
                              meta=OEFieldMeta().set_option(
                                  Meta.Units.Energy.kCal_per_mol))

        metaFreeEnergy_err = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy)
        free_energy_err = OEField('FE_Error_OPLMD',
                                  Types.Float,
                                  meta=metaFreeEnergy_err)

    class FEC:
        # Free Energy
        free_energy = OEField('FE_OPLMD',
                              Types.Float,
                              meta=OEFieldMeta().set_option(
                                  Meta.Units.Energy.kCal_per_mol))

        metaFreeEnergy_err = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy)
        free_energy_err = OEField('FE_Error_OPLMD',
                                  Types.Float,
                                  meta=metaFreeEnergy_err)

        class RBFEC:
            # Oriented Edge field for relative free energy calculations
            # The first integer of the list is the ligand ID of the starting
            # thermodynamic state and the second the final one
            edgeid = OEField("EdgeID_OPLMD", Types.Int, meta=_metaHidden)
            edge_name = OEField("EdgeName_OPLMD", Types.String)

            # The Thermodynamics leg type is used for Bound and
            # UnBound State run identification
            thd_leg_type = OEField("Thd_Leg_OPLMD",
                                   Types.String,
                                   meta=_metaHidden)

            class NESC:

                state_A = OEField("StateA_OPLMD", Types.Record)
                state_B = OEField("StateB_OPLMD", Types.Record)

                gmx_top = OEField("GMX_Top_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                gmx_gro = OEField("GMX_Gro_OPLMD",
                                  Types.String,
                                  meta=_metaHidden)
                work = OEField("GMX_Work_OPLMD",
                               Types.Float,
                               meta=OEFieldMeta().set_option(
                                   Meta.Units.Energy.kJ_per_mol))
                frame_count = OEField("frame_count",
                                      Types.Int,
                                      meta=_metaHidden)

                # The Work record is used to collect the data related to the
                # Work Forward and Reverse for the Bound and Unbound States
                work_rec = OEField("Work_Record_OPLMD", Types.Record)

                # The Relative Binding Affinity record collects data for the
                # different analysis methods used to compute it
                DDG_rec = OEField("DDG_Record_OPLMD", Types.Record)
Example #14
0
    class Analysis:

        # The poseIdVec vector addresses an input poseid for each traj frame
        poseIdVec = OEField("PoseIdVec", Types.IntVec, meta=_metaHidden)

        # The OETraj Field is for the record containing Traj OEMols and energies
        oetraj_rec = OEField("OETraj", Types.Record, meta=_metaHidden)

        # The TrajIntE Field is for the record containing Traj interaction energies
        oeintE_rec = OEField("TrajIntE", Types.Record, meta=_metaHidden)

        # The TrajIntEDict Field is for the POD Dictionary containing Traj interaction energies
        oeintE_dict = OEField("TrajIntEDict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The TrajPBSA Field is for the record containing Traj PBSA energies
        oepbsa_rec = OEField("TrajPBSA", Types.Record, meta=_metaHidden)

        # The TrajPBSADict Field is for the POD Dictionary containing Traj PBSA energies
        oepbsa_dict = OEField("TrajPBSADict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The TrajClus Field is for the record containing Traj ligand clustering results
        oeclus_rec = OEField("TrajClus", Types.Record, meta=_metaHidden)

        # The TrajClusDict Field is for the POD Dictionary containing Traj ligand clustering results
        oeclus_dict = OEField("TrajClusDict",
                              Types.JSONObject,
                              meta=_metaHidden)

        # The ClusPopDict Field is for the POD Dictionary containing conf/cluster population results
        cluspop_dict = OEField("ClusPopDict",
                               Types.JSONObject,
                               meta=_metaHidden)

        # The AnalysesDone Field is for a list of the analyses that have been done
        analysesDone = OEField("AnalysesDone",
                               Types.StringVec,
                               meta=_metaHidden)

        # The Lig_Conf_Data Field is for the record containing Traj conf data for all confs
        oetrajconf_rec = OEField("Lig_Conf_Data",
                                 Types.RecordVec,
                                 meta=_metaHidden)

        # The vector of ligand Traj RMSDs from the initial pose
        lig_traj_rmsd = OEField('LigTrajRMSD',
                                Types.FloatVec,
                                meta=OEFieldMeta().set_option(
                                    Meta.Units.Length.Ang))

        # The mmpbsa Field contains the vector of per-frame mmpbsa values over the whole trajectory
        zapMMPBSA_fld = OEField("OEZap_MMPBSA6_Bind",
                                Types.FloatVec,
                                meta=OEFieldMeta().set_option(
                                    Meta.Units.Energy.kCal))

        # mmpbsa ensemble average over the whole trajectory
        mmpbsa_traj_mean = OEField('MMPBSATrajMean',
                                   Types.Float,
                                   meta=OEFieldMeta().set_option(
                                       Meta.Units.Energy.kCal_per_mol))

        metaMMPBSA_traj_serr = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaMMPBSA_traj_serr.add_relation(Meta.Relations.ErrorsFor,
                                          mmpbsa_traj_mean)
        mmpbsa_traj_serr = OEField('MMPBSATrajSerr',
                                   Types.Float,
                                   meta=metaMMPBSA_traj_serr)

        # The number of major clusters found
        n_major_clusters = OEField("n major clusters", Types.Int)

        # Trajectory cluster averages and medians of protein and ligand
        ClusLigAvg_fld = OEField('ClusLigAvgMol', Types.Chem.Mol)
        ClusProtAvg_fld = OEField('ClusProtAvgMol', Types.Chem.Mol)
        ClusLigMed_fld = OEField('ClusLigMedMol', Types.Chem.Mol)
        ClusProtMed_fld = OEField('ClusProtMedMol', Types.Chem.Mol)

        max_waters = OEField("MaxWaters_OPLMD", Types.Int, meta=_metaHidden)

        # Free Energy Yank
        # Analysis Fields
        free_energy = OEField('FE_OPLMD',
                              Types.Float,
                              meta=OEFieldMeta().set_option(
                                  Meta.Units.Energy.kCal_per_mol))

        metaFreeEnergy_err = OEFieldMeta().set_option(
            Meta.Units.Energy.kCal_per_mol)
        metaFreeEnergy_err.add_relation(Meta.Relations.ErrorsFor, free_energy)
        free_energy_err = OEField('FE_Error_OPLMD',
                                  Types.Float,
                                  meta=metaFreeEnergy_err)
Example #15
0
class PersesCube(RecordPortsMixin, ComputeCube):
    # Cube documentation.  This documentation for this cube, and all other cubes in this repository, can be converted
    # to html by calling 'invoke docs' from the root directory of this repository.  This documentation will also
    # appear in the Orion Floe editor.
    title = "Perses relative free energy calculations cube"
    classification = [["Free Energy"]]
    tags = [
        "Ligand", "Protein", "Free Energy", "Perses",
        "Relative Binding Free Energy", "Alchemical"
    ]
    description = """
    Compute a relative binding free energy using Perses.

    This cube uses Perses to perform a relative alchemical free energy calculation.

    See https://github.com/choderalab/perses for more information about perses.
    """
    uuid = "0fc3762d-35cc-4704-b4c3-820321d98040"

    # Override defaults for some parameters
    parameter_overrides = {
        "gpu_count": {
            "default": 1
        },
        "memory_mb": {
            "default": 6000
        },
        "spot_policy": {
            "default": "Prohibited"
        },  # TODO: Figure out how to allow spot policy
        #"spot_policy": {"default": "Allowed"}, # TODO: Write code to resume automatically
        "prefetch_count": {
            "default": 1
        },  # 1 molecule at a time
        "item_count": {
            "default": 1
        }  # 1 molecule at a time
    }

    # TODO: Automatically update these
    AVAILABLE_PROTEIN_FORCEFIELDS = [
        'amber/protein.ff14SB.xml',
        'amber/ff99SBildn.xml',
    ]

    # TODO: Automatically update these
    AVAILABLE_LIGAND_FORCEFIELDS = [
        'openff-1.0.0',
        'smirnoff99Frosst-1.1.0',
        'gaff-2.11',
        'gaff-1.81',
    ]

    AVAILABLE_SOLVENT_FORCEFIELDS = [
        'amber/tip3p_standard.xml',
        'amber/tip3pfb_standard.xml',
    ]

    protein_forcefield = StringParameter(
        'protein_forcefield',
        default=AVAILABLE_PROTEIN_FORCEFIELDS[0],
        choices=AVAILABLE_PROTEIN_FORCEFIELDS,
        help_text='Force field parameters to be applied to the protein')

    solvent_forcefield = StringParameter(
        'solvent_forcefield',
        default=AVAILABLE_SOLVENT_FORCEFIELDS[0],
        choices=AVAILABLE_SOLVENT_FORCEFIELDS,
        help_text='Force field parameters to be applied to water and ions')

    ligand_forcefield = StringParameter(
        'ligand_forcefield',
        default=AVAILABLE_LIGAND_FORCEFIELDS[0],
        choices=AVAILABLE_LIGAND_FORCEFIELDS,
        help_text='Force field to be applied to the ligand')

    temperature = StringParameter('temperature',
                                  default=300.0,
                                  help_text="Temperature (Kelvin)")

    temperature = DecimalParameter('temperature',
                                   default=300.0,
                                   help_text="Temperature (Kelvin)")

    pressure = DecimalParameter('pressure',
                                default=1.0,
                                help_text="Pressure (atm)")

    n_iterations = IntegerParameter('n_iterations',
                                    default=5000,
                                    help_text="Total number of iterations")

    n_steps_per_iteration = IntegerParameter(
        'n_steps_per_iteration',
        default=250,
        help_text="Number of MD steps per iteration")

    checkpoint_interval = IntegerParameter(
        'checkpoint_interval',
        default=500,
        help_text="Full checkpoint interval (iterations)")

    timestep = DecimalParameter('timestep',
                                default=4.0,
                                help_text="Timestep (fs)")

    hmr = BooleanParameter(
        'hmr',
        default=True,
        description='On enables Hydrogen Mass Repartitioning (HMR)')

    suffix = StringParameter(
        'suffix',
        default='perses',
        help_text='Filename suffix for output simulation files')

    nonbonded_method = StringParameter('nonbonded_method',
                                       default='PME',
                                       choices=['PME', 'CutoffPeriodic'],
                                       help_text='Nonbonded method to use')

    solvent_padding = DecimalParameter('solvent_padding',
                                       default=9.0,
                                       help_text="Solvent padding (A)")

    n_states = IntegerParameter(
        'n_states',
        default=11,
        help_text='Number of alchemical intermediate states')

    vacuum_test = BooleanParameter(
        'vacuum_test',
        default=False,
        help_text='If True, just run a quick test in vacuum')

    # Ports
    protein_port = RecordInputPort("protein_port", initializer=True)
    reference_ligand_port = RecordInputPort("reference_ligand_port",
                                            initializer=True)
    target_ligands_port = RecordInputPort("target_ligands_port")

    # Fields
    log_field = OEField("log_field", Types.String)
    DDG_field = OEField("DDG_field", Types.Float)
    dDDG_field = OEField("dDDG_field", Types.Float)

    def begin(self):
        # Log OpenMM version
        from simtk import openmm
        self.log.info(f'OpenMM version: {openmm.version.version}')

        # Retrieve receptor
        mols = [
            record.get_value(OEPrimaryMolField())
            for record in self.protein_port
        ]
        if len(mols) != 1:
            raise Exception(f'{len(mols)} molecules found on protein_port')
        self._receptor = mols[0]
        self.log.info(f'Receptor: {self._receptor.NumAtoms()}')

        # Retrieve reference ligand
        mols = [
            record.get_value(OEPrimaryMolField())
            for record in self.reference_ligand_port
        ]
        if len(mols) != 1:
            raise Exception(
                f'{len(mols)} molecules found on reference_ligand_port')
        self._reference_ligand = mols[0]
        self.log.info(f'Reference ligand: {self._reference_ligand.NumAtoms()}')

        # Create YAML file
        self.log.info('Creating YAML file...')
        setup_options = dict()
        if self.args.vacuum_test:
            setup_options['phases'] = ['vacuum']
        else:
            setup_options['phases'] = ['solvent', 'complex']
        setup_options['protein_pdb'] = 'receptor.pdb'
        setup_options['ligand_file'] = 'ligands.sdf'
        setup_options['old_ligand_index'] = 0
        setup_options['new_ligand_index'] = 1
        setup_options['forcefield_files'] = [
            self.args.protein_forcefield, self.args.solvent_forcefield
        ]
        setup_options['temperature'] = self.args.temperature
        setup_options['pressure'] = self.args.pressure
        setup_options[
            'small_molecule_forcefield'] = self.args.ligand_forcefield
        setup_options['atom_expression'] = ['IntType']
        setup_options[
            'n_steps_per_move_application'] = self.args.n_steps_per_iteration
        setup_options['fe_type'] = 'repex'
        setup_options['checkpoint_interval'] = self.args.checkpoint_interval
        setup_options['n_cycles'] = self.args.n_iterations
        setup_options['n_states'] = self.args.n_states
        setup_options['n_equilibration_iterations'] = 0
        setup_options['trajectory_directory'] = 'lig0to1'
        setup_options['trajectory_prefix'] = 'out'
        setup_options['atom_selection'] = 'not water'
        setup_options['timestep'] = self.args.timestep
        setup_options['solvent_padding'] = self.args.solvent_padding
        setup_options['save_setup_pickle_as'] = 'out.pkl'

        self.log.info('Writing YAML file...')
        import os
        self.yaml_filename = os.path.abspath('perses.yaml')
        with open(self.yaml_filename, 'w') as output:
            yaml.dump(setup_options, output)
            self.log.info(yaml.dump(setup_options))

    def process(self, record, port):
        # Make sure we have a molecule defined
        if not record.has_value(OEPrimaryMolField()):
            record.set_value(self.args.log_field,
                             'Record is missing an input molecule field')
            self.failure.emit(record)
        mol = record.get_value(OEPrimaryMolField())

        # Report which compound we are processing
        from openeye.oechem import OEMolToSmiles
        smiles = OEMolToSmiles(mol)
        self.log.info(f"Processing compound {smiles}")

        # Generate arbitrary 3D coordinates for target ligand
        from openeye import oeomega
        omegaOpts = oeomega.OEOmegaOptions()
        omega = oeomega.OEOmega(omegaOpts)
        ret_code = omega.Build(mol)
        if ret_code != oeomega.OEOmegaReturnCode_Success:
            record.set_value(self.args.log_field,
                             oeomega.OEGetOmegaError(ret_code))
            self.failure.emit(record)

        from tempfile import TemporaryDirectory
        import os
        cwd = os.getcwd()
        with TemporaryDirectory() as tmpdir:
            self.log.info(f"Entering temporary directory {tmpdir}")
            os.chdir(tmpdir)

            # Set up perses calculation
            from perses.app.setup_relative_calculation import getSetupOptions, run_setup, run
            self.log.info(f"Loading setup options...")
            setup_options = getSetupOptions(self.yaml_filename)
            self.log.info(str(setup_options))

            # Prepare input for perses
            # TODO: Use tempdir in future for filesystem reasons
            self.log.info(f"Writing receptor...", flush=True)
            from openeye import oechem
            protein_pdb_filename = 'receptor.pdb'
            with oechem.oemolostream(protein_pdb_filename) as ofs:
                oechem.OEWriteMolecule(ofs, self._receptor)
            self.log.info(f"Writing ligands...", flush=True)
            ligands_sdf_filename = 'ligands.sdf'
            with oechem.oemolostream(ligands_sdf_filename) as ofs:
                oechem.OEWriteMolecule(ofs,
                                       self._reference_ligand)  # molecule 0
                oechem.OEWriteMolecule(ofs, mol)  # molecule 1

            self.log.info(f"Setting up perses calculation...", flush=True)
            perses_setup = run_setup(setup_options)

            self.log.info(f"Running calculations...", flush=True)
            run(self.yaml_filename)

            # Analyze the data
            self.log.info(f"Analyzing calculations...", flush=True)
            from perses.analysis.load_simulations import Simulation
            simulation = Simulation(0, 1)
            simulation.load_data()

            os.chdir(cwd)

        # Set output molecule information
        # TODO: Store trajectory or final snapshots
        phases = setup_options['phases']
        if ('complex' in phases) and ('solvent' in phases):
            self.log.info(
                f"DDG = {simulation.bindingdb} +- {simulation.bindingddg} kcal/mol..."
            )
            record.set_value(self.DDG_field, simulation.bindingdg)
            record.set_field(self.dDDG_field, simulation.bindingddg)
        elif 'vacuum' in setup_options['p']:
            self.log.info(
                f"DDG(vacuum) = {simulation.vacdb} +- {simulation.vacddg} kcal/mol..."
            )
            record.set_value(self.DDG_field, simulation.vacdg)
            record.set_field(self.dDDG_field, simulation.vacddg)
        self.success.emit(record)

    # Uncomment this and implement to cleanup the cube at the end of the run
    def end(self):
        # TO DO: Clean up?
        pass