def run_setup(setup_options, serialize_systems=True, build_samplers=True):
    """
    Run the setup pipeline and return the relevant setup objects based on a yaml input file.
    Parameters
    ----------
    setup_options : dict
        result of loading yaml input file
    Returns
    -------
    setup_dict: dict
        {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss}
        - 'topology_proposals':
    """
    phases = setup_options['phases']
    known_phases = ['complex', 'solvent', 'vacuum']
    for phase in phases:
        assert (
            phase in known_phases
        ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}"

    if 'use_given_geometries' not in list(setup_options.keys()):
        use_given_geometries = False
    else:
        assert type(setup_options['use_given_geometries']) == type(True)
        use_given_geometries = setup_options['use_given_geometries']

    if 'complex' in phases:
        _logger.info(f"\tPulling receptor (as pdb or mol2)...")
        # We'll need the protein PDB file (without missing atoms)
        try:
            protein_pdb_filename = setup_options['protein_pdb']
            assert protein_pdb_filename is not None
            receptor_mol2 = None
        except KeyError:
            try:
                receptor_mol2 = setup_options['receptor_mol2']
                assert receptor_mol2 is not None
                protein_pdb_filename = None
            except KeyError as e:
                print(
                    "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation"
                )
                raise e
    else:
        protein_pdb_filename = None
        receptor_mol2 = None

    # And a ligand file containing the pair of ligands between which we will transform
    ligand_file = setup_options['ligand_file']
    _logger.info(f"\tdetected ligand file: {ligand_file}")

    # get the indices of ligands out of the file:
    old_ligand_index = setup_options['old_ligand_index']
    new_ligand_index = setup_options['new_ligand_index']
    _logger.info(
        f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}"
    )

    _logger.info(f"\tsetting up forcefield files...")
    forcefield_files = setup_options['forcefield_files']

    if "timestep" in setup_options:
        if isinstance(setup_options['timestep'], float):
            timestep = setup_options['timestep'] * unit.femtoseconds
        else:
            timestep = setup_options['timestep']
        _logger.info(f"\ttimestep: {timestep}.")
    else:
        timestep = 1.0 * unit.femtoseconds
        _logger.info(f"\tno timestep detected: setting default as 1.0fs.")

    if "neq_splitting" in setup_options:
        neq_splitting = setup_options['neq_splitting']
        _logger.info(f"\tneq_splitting: {neq_splitting}")

        try:
            eq_splitting = setup_options['eq_splitting']
            _logger.info(f"\teq_splitting: {eq_splitting}")
        except KeyError as e:
            print(
                "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one."
            )
            raise e

    else:
        eq_splitting = "V R O R V"
        neq_splitting = "V R O R V"
        _logger.info(
            f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}."
        )

    if "measure_shadow_work" in setup_options:
        measure_shadow_work = setup_options['measure_shadow_work']
        _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.")
    else:
        measure_shadow_work = False
        _logger.info(
            f"\tno measure_shadow_work specified: defaulting to False.")
    if isinstance(setup_options['pressure'], float):
        pressure = setup_options['pressure'] * unit.atmosphere
    else:
        pressure = setup_options['pressure']
    if isinstance(setup_options['temperature'], float):
        temperature = setup_options['temperature'] * unit.kelvin
    else:
        temperature = setup_options['temperature']
    if isinstance(setup_options['solvent_padding'], float):
        solvent_padding_angstroms = setup_options[
            'solvent_padding'] * unit.angstrom
    else:
        solvent_padding_angstroms = setup_options['solvent_padding']
    if isinstance(setup_options['ionic_strength'], float):
        ionic_strength = setup_options['ionic_strength'] * unit.molar
    else:
        ionic_strength = setup_options['ionic_strength']
    _logger.info(f"\tsetting pressure: {pressure}.")
    _logger.info(f"\tsetting temperature: {temperature}.")
    _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.")
    _logger.info(f"\tsetting ionic strength: {ionic_strength}M.")

    setup_pickle_file = setup_options[
        'save_setup_pickle_as'] if 'save_setup_pickle_as' in list(
            setup_options) else None
    _logger.info(f"\tsetup pickle file: {setup_pickle_file}")
    trajectory_directory = setup_options['trajectory_directory']
    _logger.info(f"\ttrajectory directory: {trajectory_directory}")
    try:
        atom_map_file = setup_options['atom_map']
        with open(atom_map_file, 'r') as f:
            atom_map = {
                int(x.split()[0]): int(x.split()[1])
                for x in f.readlines()
            }
        _logger.info(f"\tsucceeded parsing atom map.")
    except Exception:
        atom_map = None
        _logger.info(f"\tno atom map specified: default to None.")

    if 'topology_proposal' not in list(setup_options.keys(
    )) or setup_options['topology_proposal'] is None:
        _logger.info(
            f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n"
        )
        if 'set_solvent_box_dims_to_complex' in list(setup_options.keys(
        )) and setup_options['set_solvent_box_dims_to_complex']:
            set_solvent_box_dims_to_complex = True
        else:
            set_solvent_box_dims_to_complex = False

        _logger.info(
            f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}'
        )
        fe_setup = RelativeFEPSetup(
            ligand_file,
            old_ligand_index,
            new_ligand_index,
            forcefield_files,
            phases=phases,
            protein_pdb_filename=protein_pdb_filename,
            receptor_mol2_filename=receptor_mol2,
            pressure=pressure,
            temperature=temperature,
            solvent_padding=solvent_padding_angstroms,
            spectator_filenames=setup_options['spectators'],
            map_strength=setup_options['map_strength'],
            atom_expr=setup_options['atom_expr'],
            bond_expr=setup_options['bond_expr'],
            atom_map=atom_map,
            neglect_angles=setup_options['neglect_angles'],
            anneal_14s=setup_options['anneal_1,4s'],
            small_molecule_forcefield=setup_options[
                'small_molecule_forcefield'],
            small_molecule_parameters_cache=setup_options[
                'small_molecule_parameters_cache'],
            trajectory_directory=trajectory_directory,
            trajectory_prefix=setup_options['trajectory_prefix'],
            nonbonded_method=setup_options['nonbonded_method'],
            complex_box_dimensions=setup_options['complex_box_dimensions'],
            solvent_box_dimensions=setup_options['solvent_box_dimensions'],
            ionic_strength=ionic_strength,
            remove_constraints=setup_options['remove_constraints'],
            use_given_geometries=use_given_geometries)

        _logger.info(f"\twriting pickle output...")
        if setup_pickle_file is not None:
            with open(
                    os.path.join(os.getcwd(), trajectory_directory,
                                 setup_pickle_file), 'wb') as f:
                try:
                    pickle.dump(fe_setup, f)
                    _logger.info(f"\tsuccessfully dumped pickle.")
                except Exception as e:
                    print(e)
                    print("\tUnable to save setup object as a pickle")

            _logger.info(
                f"\tsetup is complete.  Writing proposals and positions for each phase to top_prop dict..."
            )
        else:
            _logger.info(
                f"\tsetup is complete.  Omitted writing proposals and positions for each phase to top_prop dict..."
            )

        top_prop = dict()
        for phase in phases:
            top_prop[f'{phase}_topology_proposal'] = getattr(
                fe_setup, f'{phase}_topology_proposal')
            top_prop[f'{phase}_geometry_engine'] = getattr(
                fe_setup, f'_{phase}_geometry_engine')
            top_prop[f'{phase}_old_positions'] = getattr(
                fe_setup, f'{phase}_old_positions')
            top_prop[f'{phase}_new_positions'] = getattr(
                fe_setup, f'{phase}_new_positions')
            top_prop[f'{phase}_added_valence_energy'] = getattr(
                fe_setup, f'_{phase}_added_valence_energy')
            top_prop[f'{phase}_subtracted_valence_energy'] = getattr(
                fe_setup, f'_{phase}_subtracted_valence_energy')
            top_prop[f'{phase}_logp_proposal'] = getattr(
                fe_setup, f'_{phase}_logp_proposal')
            top_prop[f'{phase}_logp_reverse'] = getattr(
                fe_setup, f'_{phase}_logp_reverse')
            top_prop[f'{phase}_forward_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_forward_neglected_angles')
            top_prop[f'{phase}_reverse_neglected_angles'] = getattr(
                fe_setup, f'_{phase}_reverse_neglected_angles')

        top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old
        top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new
        top_prop[
            'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map
        _logger.info(f"\twriting atom_mapping.png")
        atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory,
                                        'atom_mapping.png')

        if 'render_atom_map' in list(
                setup_options.keys()) and setup_options['render_atom_map']:
            render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old,
                                fe_setup._ligand_oemol_new,
                                fe_setup.non_offset_new_to_old_atom_map)

    else:
        _logger.info(f"\tloading topology proposal from yaml setup options...")
        top_prop = np.load(setup_options['topology_proposal']).item()

    n_steps_per_move_application = setup_options[
        'n_steps_per_move_application']
    _logger.info(
        f"\t steps per move application: {n_steps_per_move_application}")
    trajectory_directory = setup_options['trajectory_directory']

    trajectory_prefix = setup_options['trajectory_prefix']
    _logger.info(f"\ttrajectory prefix: {trajectory_prefix}")

    if 'atom_selection' in setup_options:
        atom_selection = setup_options['atom_selection']
        _logger.info(f"\tatom selection detected: {atom_selection}")
    else:
        _logger.info(f"\tno atom selection detected: default to all.")
        atom_selection = 'all'

    if setup_options['fe_type'] == 'neq':
        _logger.info(f"\tInstantiating nonequilibrium switching FEP")
        n_equilibrium_steps_per_iteration = setup_options[
            'n_equilibrium_steps_per_iteration']
        ncmc_save_interval = setup_options['ncmc_save_interval']
        write_ncmc_configuration = setup_options['write_ncmc_configuration']
        if setup_options['LSF']:
            _internal_parallelism = {
                'library': ('dask', 'LSF'),
                'num_processes': setup_options['processes']
            }
        else:
            _internal_parallelism = None

        ne_fep = dict()
        for phase in phases:
            _logger.info(f"\t\tphase: {phase}")
            hybrid_factory = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

            if build_samplers:
                ne_fep[phase] = SequentialMonteCarlo(
                    factory=hybrid_factory,
                    lambda_protocol=setup_options['lambda_protocol'],
                    temperature=temperature,
                    trajectory_directory=trajectory_directory,
                    trajectory_prefix=f"{trajectory_prefix}_{phase}",
                    atom_selection=atom_selection,
                    timestep=timestep,
                    eq_splitting_string=eq_splitting,
                    neq_splitting_string=neq_splitting,
                    collision_rate=setup_options['ncmc_collision_rate_ps'],
                    ncmc_save_interval=ncmc_save_interval,
                    internal_parallelism=_internal_parallelism)

        print("Nonequilibrium switching driver class constructed")

        return {'topology_proposals': top_prop, 'ne_fep': ne_fep}

    else:
        _logger.info(f"\tno nonequilibrium detected.")
        htf = dict()
        hss = dict()
        _logger.info(f"\tcataloging HybridTopologyFactories...")

        for phase in phases:
            _logger.info(f"\t\tphase: {phase}:")
            #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP
            _logger.info(
                f"\t\twriting HybridTopologyFactory for phase {phase}...")
            htf[phase] = HybridTopologyFactory(
                top_prop['%s_topology_proposal' % phase],
                top_prop['%s_old_positions' % phase],
                top_prop['%s_new_positions' % phase],
                neglected_new_angle_terms=top_prop[
                    f"{phase}_forward_neglected_angles"],
                neglected_old_angle_terms=top_prop[
                    f"{phase}_reverse_neglected_angles"],
                softcore_LJ_v2=setup_options['softcore_v2'],
                interpolate_old_and_new_14s=setup_options['anneal_1,4s'])

        for phase in phases:
            # Define necessary vars to check energy bookkeeping
            _top_prop = top_prop['%s_topology_proposal' % phase]
            _htf = htf[phase]
            _forward_added_valence_energy = top_prop['%s_added_valence_energy'
                                                     % phase]
            _reverse_subtracted_valence_energy = top_prop[
                '%s_subtracted_valence_energy' % phase]

            if not use_given_geometries:
                zero_state_error, one_state_error = validate_endstate_energies(
                    _top_prop,
                    _htf,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD
                )  #, trajectory_directory=f'{xml_directory}{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")
            else:
                _logger.info(
                    f"'use_given_geometries' was passed to setup; skipping endstate validation"
                )

            #TODO expose more of these options in input
            if build_samplers:

                n_states = setup_options['n_states']
                _logger.info(f"\tn_states: {n_states}")
                if 'n_replicas' not in setup_options:
                    n_replicas = n_states
                else:
                    n_replicas = setup_options['n_replicas']

                checkpoint_interval = setup_options['checkpoint_interval']

                # generating lambda protocol
                lambda_protocol = LambdaProtocol(
                    functions=setup_options['protocol-type'])
                _logger.info(
                    f'Using lambda protocol : {setup_options["protocol-type"]}'
                )

                if atom_selection:
                    selection_indices = htf[phase].hybrid_topology.select(
                        atom_selection)
                else:
                    selection_indices = None

                storage_name = str(trajectory_directory) + '/' + str(
                    trajectory_prefix) + '-' + str(phase) + '.nc'
                _logger.info(f'\tstorage_name: {storage_name}')
                _logger.info(f'\tselection_indices {selection_indices}')
                _logger.info(f'\tcheckpoint interval {checkpoint_interval}')
                reporter = MultiStateReporter(
                    storage_name,
                    analysis_particle_indices=selection_indices,
                    checkpoint_interval=checkpoint_interval)

                if phase == 'vacuum':
                    endstates = False
                else:
                    endstates = True

                if setup_options['fe_type'] == 'fah':
                    _logger.info('SETUP FOR FAH DONE')
                    return {
                        'topology_proposals': top_prop,
                        'hybrid_topology_factories': htf
                    }

                if setup_options['fe_type'] == 'sams':
                    hss[phase] = HybridSAMSSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'],
                        online_analysis_minimum_iterations=10,
                        flatness_criteria=setup_options['flatness-criteria'],
                        gamma0=setup_options['gamma0'])
                    hss[phase].setup(n_states=n_states,
                                     n_replicas=n_replicas,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
                elif setup_options['fe_type'] == 'repex':
                    hss[phase] = HybridRepexSampler(
                        mcmc_moves=mcmc.LangevinSplittingDynamicsMove(
                            timestep=timestep,
                            collision_rate=1.0 / unit.picosecond,
                            n_steps=n_steps_per_move_application,
                            reassign_velocities=False,
                            n_restart_attempts=20,
                            constraint_tolerance=1e-06),
                        hybrid_factory=htf[phase],
                        online_analysis_interval=setup_options['offline-freq'])
                    hss[phase].setup(n_states=n_states,
                                     temperature=temperature,
                                     storage_file=reporter,
                                     lambda_protocol=lambda_protocol,
                                     endstates=endstates)
            else:
                _logger.info(f"omitting sampler construction")

            if serialize_systems:
                # save the systems and the states
                pass

                _logger.info('WRITING OUT XML FILES')
                #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop)

                xml_directory = f'{setup_options["trajectory_directory"]}/xml/'
                if not os.path.exists(xml_directory):
                    os.makedirs(xml_directory)
                from perses.utils import data
                _logger.info('WRITING OUT XML FILES')
                _logger.info(f'Saving the hybrid, old and new system to disk')
                data.serialize(
                    htf[phase].hybrid_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz'
                )
                data.serialize(
                    htf[phase]._old_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz'
                )
                data.serialize(
                    htf[phase]._new_system,
                    f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz'
                )

        return {
            'topology_proposals': top_prop,
            'hybrid_topology_factories': htf,
            'hybrid_samplers': hss
        }
def run(yaml_filename=None):
    _logger.info("Beginning Setup...")
    if yaml_filename is None:
        try:
            yaml_filename = sys.argv[1]
            _logger.info(f"Detected yaml file: {yaml_filename}")
        except IndexError as e:
            _logger.critical(
                f"You must specify the setup yaml file as an argument to the script."
            )

    _logger.info(f"Getting setup options from {yaml_filename}")
    setup_options = getSetupOptions(yaml_filename)
    if 'lambdas' in setup_options:
        if type(setup_options['lambdas']) == int:
            lambdas = {}
            for _direction in setup_options['direction']:
                lims = (0, 1) if _direction == 'forward' else (1, 0)
                lambdas[_direction] = np.linspace(lims[0], lims[1],
                                                  setup_options['lambdas'])
        else:
            lambdas = setup_options['lambdas']
    else:
        lambdas = None

    if setup_options['run_type'] == 'anneal':
        _logger.info(f"skipping setup and annealing...")
        trajectory_prefix = setup_options['trajectory_prefix']
        trajectory_directory = setup_options['trajectory_directory']
        out_trajectory_prefix = setup_options['out_trajectory_prefix']
        for phase in setup_options['phases']:
            ne_fep_run = pickle.load(
                open(
                    os.path.join(
                        trajectory_directory,
                        "%s_%s_fep.eq.pkl" % (trajectory_prefix, phase)),
                    'rb'))
            #implement the appropriate parallelism, otherwise the default from the previous incarnation of the ne_fep_run will be used.
            if setup_options['LSF']:
                _internal_parallelism = {
                    'library': ('dask', 'LSF'),
                    'num_processes': setup_options['processes']
                }
            else:
                _internal_parallelism = None
            ne_fep_run.implement_parallelism(
                external_parallelism=None,
                internal_parallelism=_internal_parallelism)
            ne_fep_run.neq_integrator = setup_options['neq_integrator']
            ne_fep_run.LSF = setup_options['LSF']
            ne_fep_run.AIS(num_particles=setup_options['n_particles'],
                           protocols=lambdas,
                           num_integration_steps=setup_options[
                               'ncmc_num_integration_steps'],
                           return_timer=False,
                           rethermalize=setup_options['ncmc_rethermalize'])

            # try to write out the ne_fep object as a pickle
            try:
                with open(
                        os.path.join(
                            trajectory_directory, "%s_%s_fep.neq.pkl" %
                            (out_trajectory_prefix, phase)), 'wb') as f:
                    pickle.dump(ne_fep_run, f)
                    print("pickle save successful; terminating.")

            except Exception as e:
                print(e)
                print("Unable to save run object as a pickle; saving as npy")
                np.savez(
                    os.path.join(
                        trajectory_directory,
                        "%s_%s_fep.neq.npy" % (out_trajectory_prefix, phase)),
                    ne_fep_run)

    else:
        _logger.info(f"Running setup...")
        setup_dict = run_setup(setup_options)

        trajectory_prefix = setup_options['trajectory_prefix']
        trajectory_directory = setup_options['trajectory_directory']

        #write out topology proposals
        try:
            _logger.info(
                f"Writing topology proposal {trajectory_prefix}_topology_proposals.pkl to {trajectory_directory}..."
            )
            with open(
                    os.path.join(
                        trajectory_directory,
                        "%s_topology_proposals.pkl" % (trajectory_prefix)),
                    'wb') as f:
                pickle.dump(setup_dict['topology_proposals'], f)
        except Exception as e:
            print(e)
            _logger.info(
                "Unable to save run object as a pickle; saving as npy")
            np.savez(
                os.path.join(trajectory_directory,
                             "%s_topology_proposals.npy" %
                             (trajectory_prefix)),
                setup_dict['topology_proposals'])

        n_equilibration_iterations = setup_options[
            'n_equilibration_iterations']  #set this to 1 for neq_fep
        _logger.info(
            f"Equilibration iterations: {n_equilibration_iterations}.")

        if setup_options['fe_type'] == 'neq':
            temperature = setup_options['temperature'] * unit.kelvin
            max_file_size = setup_options['max_file_size']

            ne_fep = setup_dict['ne_fep']
            for phase in setup_options['phases']:
                ne_fep_run = ne_fep[phase]
                hybrid_factory = ne_fep_run.factory

                top_proposal = setup_dict['topology_proposals'][
                    f"{phase}_topology_proposal"]
                _forward_added_valence_energy = setup_dict[
                    'topology_proposals'][f"{phase}_added_valence_energy"]
                _reverse_subtracted_valence_energy = setup_dict[
                    'topology_proposals'][f"{phase}_subtracted_valence_energy"]

                zero_state_error, one_state_error = validate_endstate_energies(
                    hybrid_factory._topology_proposal,
                    hybrid_factory,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD,
                    trajectory_directory=
                    f'{setup_options["trajectory_directory"]}/xml/{phase}')
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")

                print("activating client...")
                processes = setup_options['processes']
                adapt = setup_options['adapt']
                LSF = setup_options['LSF']

                if setup_options['run_type'] == None or setup_options[
                        'run_type'] == 'equilibrate':
                    print("equilibrating...")
                    # Now we have to pull the files
                    if setup_options['direction'] == None:
                        endstates = [0, 1]
                    else:
                        endstates = [
                            0
                        ] if setup_options['direction'] == 'forward' else [1]
                    #ne_fep_run.activate_client(LSF = LSF, processes = 2, adapt = adapt) #we only need 2 processes for equilibration
                    ne_fep_run.minimize_sampler_states()
                    ne_fep_run.equilibrate(
                        n_equilibration_iterations=setup_options[
                            'n_equilibration_iterations'],
                        n_steps_per_equilibration=setup_options[
                            'n_equilibrium_steps_per_iteration'],
                        endstates=[0, 1],
                        max_size=setup_options['max_file_size'],
                        decorrelate=True,
                        timer=True,
                        minimize=False)
                    #ne_fep_run.deactivate_client()
                    with open(
                            os.path.join(
                                trajectory_directory, "%s_%s_fep.eq.pkl" %
                                (trajectory_prefix, phase)), 'wb') as f:
                        pickle.dump(ne_fep_run, f)

                if setup_options['run_type'] == None:
                    print("annealing...")
                    if 'lambdas' in setup_options:
                        if type(setup_options['lambdas']) == int:
                            lambdas = {}
                            for _direction in setup_options['direction']:
                                lims = (0,
                                        1) if _direction == 'forward' else (1,
                                                                            0)
                                lambdas[_direction] = np.linspace(
                                    lims[0], lims[1], setup_options['lambdas'])
                        else:
                            lambdas = setup_options['lambdas']
                    else:
                        lambdas = None
                    ne_fep_run = pickle.load(
                        open(
                            os.path.join(
                                trajectory_directory, "%s_%s_fep.eq.pkl" %
                                (trajectory_prefix, phase)), 'rb'))
                    ne_fep_run.AIS(
                        num_particles=setup_options['n_particles'],
                        protocols=lambdas,
                        num_integration_steps=setup_options[
                            'ncmc_num_integration_steps'],
                        return_timer=False,
                        rethermalize=setup_options['ncmc_rethermalize'])

                    print("calculation complete; deactivating client")
                    #ne_fep_run.deactivate_client()

                    # try to write out the ne_fep object as a pickle
                    try:
                        with open(
                                os.path.join(
                                    trajectory_directory, "%s_%s_fep.neq.pkl" %
                                    (trajectory_prefix, phase)), 'wb') as f:
                            pickle.dump(ne_fep_run, f)
                            print("pickle save successful; terminating.")

                    except Exception as e:
                        print(e)
                        print(
                            "Unable to save run object as a pickle; saving as npy"
                        )
                        np.savez(
                            os.path.join(
                                trajectory_directory, "%s_%s_fep.neq.npy" %
                                (trajectory_prefix, phase)), ne_fep_run)

        elif setup_options['fe_type'] == 'sams':
            _logger.info(f"Detecting sams as fe_type...")
            _logger.info(
                f"Writing hybrid factory {trajectory_prefix}hybrid_factory.npy to {trajectory_directory}..."
            )
            np.savez(
                os.path.join(trajectory_directory,
                             trajectory_prefix + "hybrid_factory.npy"),
                setup_dict['hybrid_topology_factories'])

            hss = setup_dict['hybrid_samplers']
            logZ = dict()
            free_energies = dict()
            _logger.info(f"Iterating through phases for sams...")
            for phase in setup_options['phases']:
                _logger.info(f'\tRunning {phase} phase...')
                hss_run = hss[phase]

                _logger.info(f"\t\tequilibrating...\n\n")
                hss_run.equilibrate(n_equilibration_iterations)
                _logger.info(f"\n\n")

                _logger.info(f"\t\textending simulation...\n\n")
                hss_run.extend(setup_options['n_cycles'])
                _logger.info(f"\n\n")

                logZ[phase] = hss_run._logZ[-1] - hss_run._logZ[0]
                free_energies[phase] = hss_run._last_mbar_f_k[
                    -1] - hss_run._last_mbar_f_k[0]
                _logger.info(f"\t\tFinished phase {phase}")

            for phase in free_energies:
                print(
                    f"Comparing ligand {setup_options['old_ligand_index']} to {setup_options['new_ligand_index']}"
                )
                print(
                    f"{phase} phase has a free energy of {free_energies[phase]}"
                )

        elif setup_options['fe_type'] == 'repex':
            _logger.info(f"Detecting repex as fe_type...")
            _logger.info(
                f"Writing hybrid factory {trajectory_prefix}hybrid_factory.npy to {trajectory_directory}..."
            )
            np.savez(
                os.path.join(trajectory_directory,
                             trajectory_prefix + "hybrid_factory.npy"),
                setup_dict['hybrid_topology_factories'])

            hss = setup_dict['hybrid_samplers']
            _logger.info(f"Iterating through phases for repex...")
            for phase in setup_options['phases']:
                print(f'Running {phase} phase')
                hss_run = hss[phase]

                _logger.info(f"\t\tequilibrating...\n\n")
                hss_run.equilibrate(n_equilibration_iterations)
                _logger.info(f"\n\n")

                _logger.info(f"\t\textending simulation...\n\n")
                hss_run.extend(setup_options['n_cycles'])
                _logger.info(f"\n\n")

                _logger.info(f"\t\tFinished phase {phase}")
Esempio n. 3
0
    def __init__(self,
                 protein_filename,
                 mutation_chain_id,
                 mutation_residue_id,
                 proposed_residue,
                 phase='complex',
                 conduct_endstate_validation=True,
                 ligand_input=None,
                 ligand_index=0,
                 water_model='tip3p',
                 ionic_strength=0.15 * unit.molar,
                 forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                 barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50),
                 forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus},
                 periodic_forcefield_kwargs={'nonbondedMethod': app.PME},
                 nonperiodic_forcefield_kwargs=None,
                 small_molecule_forcefields='gaff-2.11',
                 complex_box_dimensions=None,
                 apo_box_dimensions=None,
                 flatten_torsions=False,
                 flatten_exceptions=False,
                 repartitioned_endstate=None,
                 **kwargs):
        """
        arguments
            protein_filename : str
                path to protein (to mutate); .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
                endstate validation cannot and will not be conducted.
            ligand_file : str, default None
                path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
            ligand_index : int, default 0
                which ligand to use
            water_model : str, default 'tip3p'
                solvent model to use for solvation
            ionic_strength : float * unit.molar, default 0.15 * unit.molar
                the total concentration of ions (both positive and negative) to add using Modeller.
                This does not include ions that are added to neutralize the system.
                Note that only monovalent ions are currently supported.
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME}
                periodic forcefield kwargs for system parametrization
            nonperiodic_forcefield_kwargs : dict, default None
                non-periodic forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization
            complex_box_dimensions : Vec3, default None
                define box dimensions of complex phase;
                if None, padding is 1nm
            apo_box_dimensions :  Vec3, default None
                define box dimensions of apo phase phase;
                if None, padding is 1nm
            flatten_torsions : bool, default False
                in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1
            flatten_exceptions : bool, default False
                in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1
            repartitioned_endstate : int, default None
                the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None,
                meaning a vanilla HybridTopologyFactory will be built.
        TODO : allow argument for spectator ligands besides the 'ligand_file'

        """

        # First thing to do is load the apo protein to mutate...
        protein_pdbfile = open(protein_filename, 'r')
        protein_pdb = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()
        protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology)
        protein_topology = protein_md_topology.to_openmm()
        protein_n_atoms = protein_md_topology.n_atoms

        # Load the ligand, if present
        molecules = []
        if ligand_input:
            if isinstance(ligand_input, str):
                if ligand_input.endswith('.sdf'): # small molecule
                        ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index)
                        molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
                        ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol),  forcefield_generators.generateTopologyFromOEMol(ligand_mol)
                        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                        ligand_n_atoms = ligand_md_topology.n_atoms

                if ligand_input.endswith('pdb'): # protein
                    ligand_pdbfile = open(ligand_input, 'r')
                    ligand_pdb = app.PDBFile(ligand_pdbfile)
                    ligand_pdbfile.close()
                    ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(
                        ligand_pdb.topology)
                    ligand_n_atoms = ligand_md_topology.n_atoms

            elif isinstance(ligand_input, oechem.OEMol): # oemol object
                molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False))
                ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input),  forcefield_generators.generateTopologyFromOEMol(ligand_input)
                ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                ligand_n_atoms = ligand_md_topology.n_atoms

            else:
                _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file')
                return

            # Now create a complex
            complex_md_topology = protein_md_topology.join(ligand_md_topology)
            complex_topology = complex_md_topology.to_openmm()
            complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
            complex_positions[:protein_n_atoms, :] = protein_positions
            complex_positions[protein_n_atoms:, :] = ligand_positions

        # Now for a system_generator
        self.system_generator = SystemGenerator(forcefields=forcefield_files,
                                                barostat=barostat,
                                                forcefield_kwargs=forcefield_kwargs,
                                                periodic_forcefield_kwargs=periodic_forcefield_kwargs,
                                                nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                small_molecule_forcefield=small_molecule_forcefields,
                                                molecules=molecules,
                                                cache=None)

        # Solvate apo and complex...
        apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions))
        inputs = [apo_input]
        if ligand_input:
            inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions))

        geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                                use_sterics=False,
                                                n_bond_divisions=100,
                                                n_angle_divisions=180,
                                                n_torsion_divisions=360,
                                                verbose=True,
                                                storage=None,
                                                bond_softening_constant=1.0,
                                                angle_softening_constant=1.0,
                                                neglect_angles = False,
                                                use_14_nonbondeds = True)


        # Run pipeline...
        htfs = []
        for (top, pos, sys) in inputs:
            point_mutation_engine = PointMutationEngine(wildtype_topology=top,
                                                                 system_generator=self.system_generator,
                                                                 chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)
                                                                 max_point_mutants=1,
                                                                 residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate
                                                                 allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change
                                                                 aggregate=True) # Always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            # Only validate energy bookkeeping if the WT and proposed residues do not involve rings
            old_res = [res for res in top.residues() if res.id == mutation_residue_id][0]
            validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True
            new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta,
                                                                   validate_energy_bookkeeping=validate_bool)
            logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta,
                                                        validate_energy_bookkeeping=validate_bool)

            if repartitioned_endstate is None:
                factory = HybridTopologyFactory
            elif repartitioned_endstate in [0, 1]:
                factory = RepartitionedHybridTopologyFactory

            forward_htf = factory(topology_proposal=topology_proposal,
                                  current_positions=pos,
                                  new_positions=new_positions,
                                  use_dispersion_correction=False,
                                  functions=None,
                                  softcore_alpha=None,
                                  bond_softening_constant=1.0,
                                  angle_softening_constant=1.0,
                                  soften_only_new=False,
                                  neglected_new_angle_terms=[],
                                  neglected_old_angle_terms=[],
                                  softcore_LJ_v2=True,
                                  softcore_electrostatics=True,
                                  softcore_LJ_v2_alpha=0.85,
                                  softcore_electrostatics_alpha=0.3,
                                  softcore_sigma_Q=1.0,
                                  interpolate_old_and_new_14s=flatten_exceptions,
                                  omitted_terms=None,
                                  endstate=repartitioned_endstate,
                                  flatten_torsions=flatten_torsions)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential


            if conduct_endstate_validation and repartitioned_endstate is None:
                zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD)
                if zero_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}")
                if one_state_error > ENERGY_THRESHOLD:
                    _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}")
            else:
                pass

            htfs.append(forward_htf)

        self.apo_htf = htfs[0]
        self.complex_htf = htfs[1] if ligand_input else None
Esempio n. 4
0
def generate_dipeptide_top_pos_sys(topology,
                                   new_res,
                                   system,
                                   positions,
                                   system_generator,
                                   conduct_geometry_prop=True,
                                   conduct_htf_prop=False):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    from perses.tests.utils import validate_endstate_energies
    if conduct_htf_prop:
        assert conduct_geometry_prop, f"the htf prop can only be conducted if there is a geometry proposal"
    #create the point mutation engine
    from perses.rjmc.topology_proposal import PointMutationEngine
    point_mutation_engine = PointMutationEngine(
        wildtype_topology=topology,
        system_generator=system_generator,
        chain_id=
        '1',  #denote the chain id allowed to mutate (it's always a string variable)
        max_point_mutants=1,
        residues_allowed_to_mutate=['2'],  #the residue ids allowed to mutate
        allowed_mutations=[
            ('2', new_res)
        ],  #the residue ids allowed to mutate with the three-letter code allowed to change
        aggregate=True)  #always allow aggregation

    #create a top proposal
    print(f"making topology proposal")
    topology_proposal = point_mutation_engine.propose(
        current_system=system, current_topology=topology)

    if not conduct_geometry_prop:
        return topology_proposal

    if conduct_geometry_prop:
        #create a geometry engine
        print(f"generating geometry engine")
        from perses.rjmc.geometry import FFAllAngleGeometryEngine
        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #make a geometry proposal forward
        print(
            f"making geometry proposal from {list(topology.residues())[1].name} to {new_res}"
        )
        forward_new_positions, logp_proposal = geometry_engine.propose(
            topology_proposal, positions, beta)
        logp_reverse = geometry_engine.logp_reverse(topology_proposal,
                                                    forward_new_positions,
                                                    positions, beta)

    if not conduct_htf_prop:
        return (topology_proposal, forward_new_positions, logp_proposal,
                logp_reverse)

    if conduct_htf_prop:
        #create a hybrid topology factory
        from perses.annihilation.relative import HybridTopologyFactory
        forward_htf = HybridTopologyFactory(
            topology_proposal=topology_proposal,
            current_positions=positions,
            new_positions=forward_new_positions,
            use_dispersion_correction=False,
            functions=None,
            softcore_alpha=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            soften_only_new=False,
            neglected_new_angle_terms=[],
            neglected_old_angle_terms=[],
            softcore_LJ_v2=True,
            softcore_electrostatics=True,
            softcore_LJ_v2_alpha=0.85,
            softcore_electrostatics_alpha=0.3,
            softcore_sigma_Q=1.0,
            interpolate_old_and_new_14s=False,
            omitted_terms=None)

        if not topology_proposal.unique_new_atoms:
            assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
            assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
            vacuum_added_valence_energy = 0.0
        else:
            added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

        if not topology_proposal.unique_old_atoms:
            assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
            assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
            subtracted_valence_energy = 0.0
        else:
            subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

        zero_state_error, one_state_error = validate_endstate_energies(
            forward_htf._topology_proposal,
            forward_htf,
            added_valence_energy,
            subtracted_valence_energy,
            beta=1.0 / (kB * temperature),
            ENERGY_THRESHOLD=ENERGY_THRESHOLD,
            platform=openmm.Platform.getPlatformByName('Reference'))
        print(f"zero state error : {zero_state_error}")
        print(f"one state error : {one_state_error}")

        return forward_htf
Esempio n. 5
0
def compare_energies(mol_name="naphthalene",
                     ref_mol_name="benzene",
                     atom_expression=['Hybridization'],
                     bond_expression=['Hybridization']):
    """
    Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same.
    """
    from openmmtools.constants import kB
    from openmmtools import alchemy, states
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.annihilation.relative import HybridTopologyFactory
    from perses.rjmc.geometry import FFAllAngleGeometryEngine
    import simtk.openmm as openmm
    from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers
    from perses.utils.openeye import generate_expression
    from openmmforcefields.generators import SystemGenerator
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    from perses.tests.utils import validate_endstate_energies
    temperature = 300 * unit.kelvin
    # Compute kT and inverse temperature.
    kT = kB * temperature
    beta = 1.0 / kT
    ENERGY_THRESHOLD = 1e-6

    atom_expr, bond_expr = generate_expression(
        atom_expression), generate_expression(bond_expression)

    mol = iupac_to_oemol(mol_name)
    mol = generate_conformers(mol, max_confs=1)

    refmol = iupac_to_oemol(ref_mol_name)
    refmol = generate_conformers(refmol, max_confs=1)

    from openforcefield.topology import Molecule
    molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]]
    barostat = None
    forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
    forcefield_kwargs = {
        'removeCMMotion': False,
        'ewaldErrorTolerance': 1e-4,
        'nonbondedMethod': app.NoCutoff,
        'constraints': app.HBonds,
        'hydrogenMass': 4 * unit.amus
    }

    system_generator = SystemGenerator(forcefields=forcefield_files,
                                       barostat=barostat,
                                       forcefield_kwargs=forcefield_kwargs,
                                       small_molecule_forcefield='gaff-2.11',
                                       molecules=molecules,
                                       cache=None)

    topology = generateTopologyFromOEMol(refmol)
    system = system_generator.create_system(topology)
    positions = extractPositionsFromOEMol(refmol)

    proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol],
                                                     system_generator)
    proposal = proposal_engine.propose(system,
                                       topology,
                                       atom_expr=atom_expr,
                                       bond_expr=bond_expr)
    geometry_engine = FFAllAngleGeometryEngine()
    new_positions, _ = geometry_engine.propose(
        proposal, positions, beta=beta, validate_energy_bookkeeping=False)
    _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta)
    #make a topology proposal with the appropriate data:

    factory = HybridTopologyFactory(proposal, positions, new_positions)
    if not proposal.unique_new_atoms:
        assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
        assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
        vacuum_added_valence_energy = 0.0
    else:
        added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

    if not proposal.unique_old_atoms:
        assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
        assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
        subtracted_valence_energy = 0.0
    else:
        subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

    zero_state_error, one_state_error = validate_endstate_energies(
        factory._topology_proposal,
        factory,
        added_valence_energy,
        subtracted_valence_energy,
        beta=1.0 / (kB * temperature),
        ENERGY_THRESHOLD=ENERGY_THRESHOLD,
        platform=openmm.Platform.getPlatformByName('Reference'))
    return factory
    def __init__(
            self,
            receptor_filename,
            ligand_filename,
            mutation_chain_id,
            mutation_residue_id,
            proposed_residue,
            phase='complex',
            conduct_endstate_validation=False,
            ligand_index=0,
            forcefield_files=[
                'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'
            ],
            barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                               temperature, 50),
            forcefield_kwargs={
                'removeCMMotion': False,
                'ewaldErrorTolerance': 1e-4,
                'nonbondedMethod': app.PME,
                'constraints': app.HBonds,
                'hydrogenMass': 4 * unit.amus
            },
            small_molecule_forcefields='gaff-2.11',
            **kwargs):
        """
        arguments
            receptor_filename : str
                path to receptor; .pdb
            ligand_filename : str
                path to ligand of interest; .sdf or .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the hybrid topology factory
            ligand_index : int, default 0
                which ligand to use
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization

        TODO : allow argument for separate apo structure if it exists separately
               allow argument for specator ligands besides the 'ligand_filename'
        """
        from openforcefield.topology import Molecule
        from openmmforcefields.generators import SystemGenerator

        # first thing to do is make a complex and apo...
        pdbfile = open(receptor_filename, 'r')
        pdb = app.PDBFile(pdbfile)
        pdbfile.close()
        receptor_positions, receptor_topology, receptor_md_topology = pdb.positions, pdb.topology, md.Topology.from_openmm(
            pdb.topology)
        receptor_topology = receptor_md_topology.to_openmm()
        receptor_n_atoms = receptor_md_topology.n_atoms

        molecules = []
        ligand_mol = createOEMolFromSDF(ligand_filename, index=ligand_index)
        ligand_mol = generate_unique_atom_names(ligand_mol)
        molecules.append(
            Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
        ligand_positions, ligand_topology = extractPositionsFromOEMol(
            ligand_mol), forcefield_generators.generateTopologyFromOEMol(
                ligand_mol)
        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
        ligand_n_atoms = ligand_md_topology.n_atoms

        #now create a complex
        complex_md_topology = receptor_md_topology.join(ligand_md_topology)
        complex_topology = complex_md_topology.to_openmm()
        complex_positions = unit.Quantity(np.zeros(
            [receptor_n_atoms + ligand_n_atoms, 3]),
                                          unit=unit.nanometers)
        complex_positions[:receptor_n_atoms, :] = receptor_positions
        complex_positions[receptor_n_atoms:, :] = ligand_positions

        #now for a system_generator
        self.system_generator = SystemGenerator(
            forcefields=forcefield_files,
            barostat=barostat,
            forcefield_kwargs=forcefield_kwargs,
            small_molecule_forcefield=small_molecule_forcefields,
            molecules=molecules,
            cache=None)

        #create complex and apo inputs...
        complex_topology, complex_positions, complex_system = self._solvate(
            complex_topology, complex_positions, 'tip3p', phase=phase)
        apo_topology, apo_positions, apo_system = self._solvate(
            receptor_topology, receptor_positions, 'tip3p', phase='phase')

        geometry_engine = FFAllAngleGeometryEngine(
            metadata=None,
            use_sterics=False,
            n_bond_divisions=100,
            n_angle_divisions=180,
            n_torsion_divisions=360,
            verbose=True,
            storage=None,
            bond_softening_constant=1.0,
            angle_softening_constant=1.0,
            neglect_angles=False,
            use_14_nonbondeds=True)

        #run pipeline...
        htfs = []
        for (top, pos, sys) in zip([complex_topology, apo_topology],
                                   [complex_positions, apo_positions],
                                   [complex_system, apo_system]):
            point_mutation_engine = PointMutationEngine(
                wildtype_topology=top,
                system_generator=self.system_generator,
                chain_id=
                mutation_chain_id,  #denote the chain id allowed to mutate (it's always a string variable)
                max_point_mutants=1,
                residues_allowed_to_mutate=[
                    mutation_residue_id
                ],  #the residue ids allowed to mutate
                allowed_mutations=[
                    (mutation_residue_id, proposed_residue)
                ],  #the residue ids allowed to mutate with the three-letter code allowed to change
                aggregate=True)  #always allow aggregation

            topology_proposal = point_mutation_engine.propose(sys, top)

            new_positions, logp_proposal = geometry_engine.propose(
                topology_proposal, pos, beta)
            logp_reverse = geometry_engine.logp_reverse(
                topology_proposal, new_positions, pos, beta)

            forward_htf = HybridTopologyFactory(
                topology_proposal=topology_proposal,
                current_positions=pos,
                new_positions=new_positions,
                use_dispersion_correction=False,
                functions=None,
                softcore_alpha=None,
                bond_softening_constant=1.0,
                angle_softening_constant=1.0,
                soften_only_new=False,
                neglected_new_angle_terms=[],
                neglected_old_angle_terms=[],
                softcore_LJ_v2=True,
                softcore_electrostatics=True,
                softcore_LJ_v2_alpha=0.85,
                softcore_electrostatics_alpha=0.3,
                softcore_sigma_Q=1.0,
                interpolate_old_and_new_14s=False,
                omitted_terms=None)

            if not topology_proposal.unique_new_atoms:
                assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})"
                assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})"
                vacuum_added_valence_energy = 0.0
            else:
                added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential

            if not topology_proposal.unique_old_atoms:
                assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})"
                assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})"
                subtracted_valence_energy = 0.0
            else:
                subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential

            if conduct_endstate_validation:
                zero_state_error, one_state_error = validate_endstate_energies(
                    forward_htf._topology_proposal,
                    forward_htf,
                    added_valence_energy,
                    subtracted_valence_energy,
                    beta=beta,
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD)
            else:
                pass

            htfs.append(forward_htf)

        self.complex_htf = htfs[0]
        self.apo_htf = htfs[1]
            ne_fep = setup_dict['ne_fep']
            for phase in setup_options['phases']:
                ne_fep_run = ne_fep[phase]
                hybrid_factory = ne_fep_run.factory

                top_proposal = setup_dict['topology_proposals'][
                    f"{phase}_topology_proposal"]
                _forward_added_valence_energy = setup_dict[
                    'topology_proposals'][f"{phase}_added_valence_energy"]
                _reverse_subtracted_valence_energy = setup_dict[
                    'topology_proposals'][f"{phase}_subtracted_valence_energy"]

                zero_state_error, one_state_error = validate_endstate_energies(
                    hybrid_factory._topology_proposal,
                    hybrid_factory,
                    _forward_added_valence_energy,
                    _reverse_subtracted_valence_energy,
                    beta=1.0 / (kB * temperature),
                    ENERGY_THRESHOLD=ENERGY_THRESHOLD)
                _logger.info(f"\t\terror in zero state: {zero_state_error}")
                _logger.info(f"\t\terror in one state: {one_state_error}")

                print("activating client...")
                processes = setup_options['processes']
                adapt = setup_options['adapt']
                LSF = setup_options['LSF']

                if setup_options['run_type'] == None or setup_options[
                        'run_type'] == 'equilibrate':
                    print("equilibrating...")
                    # Now we have to pull the files