def test_raises_os_error(self, pt_blank_dir, grompp): from paratemp.sim_setup import compile_tprs dir_topo = pt_blank_dir.joinpath('TOPO') dir_topo.mkdir() number = 2 with cd(dir_topo): with pytest.raises(OSError, match='Incorrect number of ' 'structure files found'): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../' + n_template, multi_structure=True, structure='../', base_name='nvt', grompp_exe=grompp) with pytest.raises(OSError, match='No structure file found'): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../' + n_template, structure='../not-here.gro', base_name='nvt', grompp_exe=grompp) with pytest.raises(OSError, match='No topology file found'): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../' + n_template, structure='../' + n_gro, topology='../not-here.top', base_name='nvt', grompp_exe=grompp)
def get_n_solvent(folder, solvent='DCM'): """ Find the number of solvent molecules of given type in topology file. Note, this function is being deprecated in favor of the more general :func:`get_solv_count_top`, which takes the strengths of this function while also allowing for specification of an exact top file. :param str folder: The folder in which to look for a file ending in '.top'. :param str solvent: Default: 'DCM' :return: The number of solvent molecules. :rtype: int """ warnings.warn( 'This function is deprecated. Please use ' 'get_solv_count_top', DeprecationWarning) re_n_solv = re.compile(r'(?:^\s*{}\s+)(\d+)'.format(solvent)) with cd(folder): f_top = glob.glob('*.top') if len(f_top) != 1: raise ValueError('Found {} .top files in {}\nOnly can deal with ' '1'.format(len(f_top), folder)) else: f_top = f_top[0] with open(f_top, 'r') as file_top: for line in file_top: solv_match = re_n_solv.search(line) if solv_match: return int(solv_match.group(1)) # Not the right error, but fine for now raise ValueError("Didn't find n_solv in {}".format(folder))
def replica_temp_path(pt_run_dir: pathlib.PosixPath): # Doesn't currently test: # content of the outputs # what happens if they already exist from paratemp.energy_histo import make_indices with cd(pt_run_dir): make_indices('PT-out0.log') return pt_run_dir / 'replica_temp.xvg'
def test_raises(self, pt_run_dir): from paratemp.sim_setup import get_gro_files with cd(pt_run_dir): open('PT-out2.trr', 'a').close() with pytest.raises(ValueError): get_gro_files(trr_base='PT-out', tpr_base='TOPO/nvt', time=2)
def test_get_gro_files(self, pt_run_dir): from paratemp.sim_setup import get_gro_files with cd(pt_run_dir): gros = get_gro_files(trr_base='PT-out', tpr_base='TOPO/nvt', time=2) assert len(gros) == 2 assert gros == ['PT-out0.gro', 'PT-out1.gro']
def sim_with_tpr(self, sim_with_dir): sim, path = sim_with_dir step = 'minimize' min_path = path / step min_path.mkdir() with cd(min_path): sim._compile_tpr(step_name=step) return sim, min_path, step
def test_get_energies(pt_run_dir): # Doesn't currently test: # content of the outputs from paratemp.energy_bin_analysis import get_energies with cd(pt_run_dir): mi_df = get_energies('PT-out') assert len(mi_df.index.levels[0]) == 2 assert isinstance(mi_df, pd.DataFrame) assert isinstance(mi_df.index, pd.MultiIndex)
def test_import_energies(self, pt_run_dir: pathlib.PosixPath): from paratemp.energy_histo import import_energies, find_energies with cd(pt_run_dir): l_xvgs = find_energies() data = import_energies(l_xvgs) assert isinstance(data, list) for d in data: assert isinstance(d, np.ndarray) assert len(data) == 2
def test_make_indices(pt_run_dir: pathlib.PosixPath): # Doesn't currently test: # content of the outputs # what happens if they already exist from paratemp.energy_histo import make_indices with cd(pt_run_dir): make_indices('PT-out0.log') assert pt_run_dir.joinpath('replica_temp.xvg').exists() assert pt_run_dir.joinpath('replica_index.xvg').exists() assert pt_run_dir.joinpath('demux.pl.log').exists()
def test_next_folder_index(self, sim): assert sim._next_folder_index == 1 path = sim.base_folder with cd(path): path.joinpath('01-minimize-phen').mkdir() path.joinpath('02-equil-phen').mkdir() assert sim._next_folder_index == 3 with cd(path): path.joinpath('05-step-mol').mkdir() assert sim._next_folder_index == 6 with cd(path): path.joinpath('100-step-mol').mkdir() assert sim._next_folder_index == 6 with cd(path): path.joinpath('06-step-mol').touch() assert sim._next_folder_index == 6 with cd(path): path.joinpath('07-dont_work_none').mkdir() assert sim._next_folder_index == 6
def test_find_energies(pt_run_dir: pathlib.PosixPath): # Doesn't currently test: # content of the outputs # what happens if they already exist from paratemp.energy_histo import find_energies with cd(pt_run_dir): l_xvgs = find_energies() for xvg in l_xvgs: assert pt_run_dir.joinpath(xvg).exists() assert re.match(r'energy[01].xvg', xvg) assert len(l_xvgs) == 2
def test_raises_runtime_error(self, pt_blank_dir, grompp): from paratemp.sim_setup import compile_tprs dir_topo = pt_blank_dir.joinpath('TOPO') dir_topo.mkdir() number = 2 with cd(dir_topo): with pytest.raises(RuntimeError): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../'+n_template, structure='../*top', base_name='nvt', grompp_exe=grompp)
def test_run_mdrun(self, sim_with_tpr): sim, path, step = sim_with_tpr with cd(path): gro = sim._run_mdrun(step_name=step) assert isinstance(gro, pathlib.Path) assert gro.exists() assert gro.is_absolute() assert gro.is_file() assert gro.suffix == '.gro' assert gro.samefile(sim.last_geometry) assert gro.samefile(sim.geometries[step]) assert isinstance(sim.deffnms[step], pathlib.Path) assert isinstance(sim.outputs['run_{}'.format(step)], str)
def test_compile_tpr(self, sim_with_dir): sim, path = sim_with_dir step = 'minimize' min_path = path / step min_path.mkdir() with cd(min_path): tpr = sim._compile_tpr(step_name=step) mdout = pathlib.Path('mdout.mdp').resolve() assert isinstance(tpr, pathlib.Path) assert tpr.exists() assert tpr.is_absolute() assert tpr.is_file() assert tpr.suffix == '.tpr' assert mdout.exists() d_tpr = sim.tprs[step] assert tpr.samefile(d_tpr) assert isinstance(sim.outputs['compile_{}'.format(step)], str)
def test_multi_structure(self, pt_blank_dir, grompp): from paratemp.sim_setup import compile_tprs from paratemp.tools import get_temperatures dir_topo = pt_blank_dir.joinpath('TOPO') dir_topo.mkdir() number = 2 with cd(dir_topo): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../'+n_template, multi_structure=True, structure='../PT-out', base_name='nvt', grompp_exe=grompp) assert dir_topo.exists() for i in range(number): assert dir_topo.joinpath('nvt{}.tpr'.format(i)).exists() assert get_temperatures( str(dir_topo.joinpath('temperatures.dat'))).shape == (2,)
def test_warns(self, pt_blank_dir, grompp): from paratemp.sim_setup import compile_tprs from paratemp.tools import get_temperatures dir_topo = pt_blank_dir.joinpath('TOPO') dir_topo.mkdir() number = 2 with cd(dir_topo): with pytest.warns(UserWarning, match=r'Found \d+ structure files'): compile_tprs(start_temp=298, scaling_exponent=0.025, number=number, template='../' + n_template, structure='../*.gro', base_name='nvt', grompp_exe=grompp) assert dir_topo.exists() for i in range(number): assert dir_topo.joinpath('nvt{}.tpr'.format(i)).exists() assert get_temperatures(str( dir_topo.joinpath('temperatures.dat'))).shape == (2, )
def _get_n_top(n_top, folder): """ Get path and name of topology file :param str n_top: None or path and file name of topology file. :param str folder: None or folder containing one topology file. :return: path to the topology file :rtype: str :raises ValueError: This is raised if more than one topology is found in the given folder. """ if n_top is None: if folder is None: raise InputError('None', 'Either folder or n_top must be ' 'specified') with cd(folder): n_top = glob.glob('*.top') if len(n_top) != 1: raise ValueError( 'Found {} .top files in {}\n'.format(len(n_top), folder) + 'Only can deal with 1') else: n_top = os.path.abspath(n_top[0]) return n_top
def extend_tprs(base_name, time, working_dir=None, sub_script=None, submit=False, extend_infix='-extend', first_extension=True, cpt_base='npt', verbose=True, log='extend-tprs.log'): """ Extend a set of tpr files :param str base_name: Base of the tpr files. This should return the file names when globbed with '*.tpr' appended to this base name. Also, this will cause issues when adding the infix if the file name doesn't fit the pattern of '{base_name}{number}.tpr'. :param time: Amount of time in picoseconds by which to extend the job. This will be cast to a string, so an int, string, or float should be fine. :type time: str or int or float :param str working_dir: Default: None. If given, this directory will be changed into and work will continue there. If working_dir is None, the working dir will be taken to be the directory one directory above the location given in base_name. :param str sub_script: Default: None. Name of the submission script. If given, the script will be edited to match the new name of the extended tpr files. sub_script can be given as an absolute path or relative to current directory (first priority) or relative to working_dir (checked second). :param bool submit: Default: False. If true, the job will be submitted to the queuing system. :param str extend_infix: Default: '-extend'. str to put into the name of the extended tpr files after the base_name and before the '[number].tpr'. :param bool first_extension: Default: True. If True, '-cpi {checkpoint base name}' will be added to the submission script so that it becomes a run continuation. :param str cpt_base: Default: 'npt'. The first part of the name of the checkpoint files that will end in '{number}.cpt'. The full checkpoint base_name will be found using :func:`~paratemp.para_temp_setup._find_cpt_base`. :param bool verbose: Default: True. If True, a lot more status information will be printed. :param str log: Default: 'extend-tprs.log'. Name of file to which to log information on this process and output from GROMACS tools. :return: None """ _tpr_dir, _rel_base_name = os.path.split(os.path.abspath(base_name)) if working_dir is None: _working_dir = os.path.abspath(_tpr_dir+'/../') else: _working_dir = working_dir if sub_script is not None: second_poss = os.path.abspath(os.path.join(_working_dir, sub_script)) if os.path.isfile(sub_script): _sub_script = os.path.abspath(sub_script) elif os.path.isfile(second_poss): _sub_script = second_poss else: raise OSError(errno.ENOENT, 'Submit script not found relative to ' 'here or working_dir.') else: _sub_script = None # Only needed so the IDE stops bothering me with cd(_working_dir), open(log, 'a') as _log: if float(time): _time = str(time) re_split_name = re.compile(r'({})(\d+\.tpr)'.format(_rel_base_name)) with cd(_tpr_dir): tpr_names = glob.glob(_rel_base_name+'*.tpr') if len(tpr_names) < 1: raise InputError(base_name, 'no files found for {}'.format( base_name+'*.tpr')) if verbose: print('Extending {} tpr files'.format(len(tpr_names))) for tpr_name in tpr_names: tpr_groups = re_split_name.match(tpr_name) new_tpr_name = (tpr_groups.group(1) + extend_infix + tpr_groups.group(2)) _extend_tpr(tpr_name, new_tpr_name, _time, _log) if verbose: print(' '*4 + 'Done extending tpr files.') extended = True else: extended = False if verbose: print('tpr files not extended (no time to be added)') if sub_script is not None: _sub_script = os.path.relpath(_sub_script) if extended: if verbose: print('Editing {} for new tpr names ' 'with {}'.format(_sub_script, extend_infix)) _replace_string_in_file(_rel_base_name + ' ', _rel_base_name + extend_infix + ' ', _sub_script, _log) if first_extension: _cpt_base = _find_cpt_base(cpt_base) _add_cpt_to_sub_script(_sub_script, _cpt_base, _log) if verbose: print('Editing {} to reference the checkpoint ' 'files {}'.format(_sub_script, _cpt_base)) if submit: if verbose: print('Submitting job...') job_info = _submit_script(_sub_script, _log) if verbose: print('Job number {} has been submitted.'.format( job_info[2])) elif submit: print('Job not submitted because no submission script name was ' 'provided.')
def extend_tprs(base_name, time, working_dir=None, sub_script=None, submit=False, extend_infix='-extend', first_extension=True, cpt_base='npt', verbose=True, log='extend-tprs.log'): """ Extend a set of tpr files :param str base_name: Base of the tpr files. This should return the file names when globbed with '*.tpr' appended to this base name. Also, this will cause issues when adding the infix if the file name doesn't fit the pattern of '{base_name}{number}.tpr'. :param time: Amount of time in picoseconds by which to extend the job. This will be cast to a string, so an int, string, or float should be fine. :type time: str or int or float :param str working_dir: Default: None. If given, this directory will be changed into and work will continue there. If working_dir is None, the working dir will be taken to be the directory one directory above the location given in base_name. :param str sub_script: Default: None. Name of the submission script. If given, the script will be edited to match the new name of the extended tpr files. sub_script can be given as an absolute path or relative to current directory (first priority) or relative to working_dir (checked second). :param bool submit: Default: False. If true, the job will be submitted to the queuing system. :param str extend_infix: Default: '-extend'. str to put into the name of the extended tpr files after the base_name and before the '[number].tpr'. :param bool first_extension: Default: True. If True, '-cpi {checkpoint base name}' will be added to the submission script so that it becomes a run continuation. :param str cpt_base: Default: 'npt'. The first part of the name of the checkpoint files that will end in '{number}.cpt'. The full checkpoint base_name will be found using :func:`~paratemp.para_temp_setup._find_cpt_base`. :param bool verbose: Default: True. If True, a lot more status information will be printed. :param str log: Default: 'extend-tprs.log'. Name of file to which to log information on this process and output from GROMACS tools. :return: None """ _tpr_dir, _rel_base_name = os.path.split(os.path.abspath(base_name)) if working_dir is None: _working_dir = os.path.abspath(_tpr_dir + '/../') else: _working_dir = working_dir if sub_script is not None: second_poss = os.path.abspath(os.path.join(_working_dir, sub_script)) if os.path.isfile(sub_script): _sub_script = os.path.abspath(sub_script) elif os.path.isfile(second_poss): _sub_script = second_poss else: raise OSError( errno.ENOENT, 'Submit script not found relative to ' 'here or working_dir.') else: _sub_script = None # Only needed so the IDE stops bothering me with cd(_working_dir), open(log, 'a') as _log: if float(time): _time = str(time) re_split_name = re.compile( r'({})(\d+\.tpr)'.format(_rel_base_name)) with cd(_tpr_dir): tpr_names = glob.glob(_rel_base_name + '*.tpr') if len(tpr_names) < 1: raise InputError( base_name, 'no files found for {}'.format(base_name + '*.tpr')) if verbose: print('Extending {} tpr files'.format(len(tpr_names))) for tpr_name in tpr_names: tpr_groups = re_split_name.match(tpr_name) new_tpr_name = (tpr_groups.group(1) + extend_infix + tpr_groups.group(2)) _extend_tpr(tpr_name, new_tpr_name, _time, _log) if verbose: print(' ' * 4 + 'Done extending tpr files.') extended = True else: extended = False if verbose: print('tpr files not extended (no time to be added)') if sub_script is not None: _sub_script = os.path.relpath(_sub_script) if extended: if verbose: print('Editing {} for new tpr names ' 'with {}'.format(_sub_script, extend_infix)) _replace_string_in_file(_rel_base_name + ' ', _rel_base_name + extend_infix + ' ', _sub_script, _log) if first_extension: _cpt_base = _find_cpt_base(cpt_base) _add_cpt_to_sub_script(_sub_script, _cpt_base, _log) if verbose: print('Editing {} to reference the checkpoint ' 'files {}'.format(_sub_script, _cpt_base)) if submit: if verbose: print('Submitting job...') job_info = _submit_script(_sub_script, _log) if verbose: print('Job number {} has been submitted.'.format( job_info[2])) elif submit: print('Job not submitted because no submission script name was ' 'provided.')
def cleanup_bad_gromacs_restart(out_base, working_dir='./', list_files=True, replace_files=False, verbose=True): """ Replace "new" files with GROMACS backed-up files after messed-up restart No timestamps are accounted for, and this is purely based on the file names and the default way GROMACS backs up files it would have otherwise replaced. :param str out_base: Base name for output files, likely the same as the '-deffnm' argument. :param str working_dir: Directory in which to look and do these replacements :param str list_files: If true, matched and unmatched files will all be printed :param bool replace_files: If true, the backed-up files will be moved to overwrite the "new" files. :param bool verbose: If true, more file counts and such will be printed. :return: None """ with cd(working_dir): good_files = glob.glob('#' + out_base + '*') bad_files = glob.glob(out_base + '*') good_files.sort() bad_files.sort() if verbose: print('Found {} "bad" and {} "good" files.'.format( len(bad_files), len(good_files))) match_dict = dict() unmatched_good = list() unmatched_bad = list(bad_files) for g_name in good_files: poss_bad_name = g_name[1:-3] # remove # from start and end and .1 if poss_bad_name in bad_files: unmatched_bad.remove(poss_bad_name) match_dict[poss_bad_name] = g_name else: unmatched_good.append(g_name) if verbose: print('Total of {} matched files.'.format(len(match_dict))) if len(unmatched_good) + len(unmatched_bad) != 0 and verbose: print('Unmatched file counts:\n ' 'good:{:>3}\n bad:{:>3}'.format(len(unmatched_good), len(unmatched_bad))) elif verbose: print('No unmatched files.') if list_files: if len(unmatched_good) != 0: print('Unmatched "good" files:') for g_name in unmatched_good: print(' {}'.format(g_name)) else: print('No unmatched "good" files') if len(unmatched_bad) != 0: print('Unmatched "bad" files:') for b_name in unmatched_bad: print(' {}'.format(b_name)) else: print('No unmatched "bad" files') if len(match_dict) > 0: print('Matched files:\n') print('-' * 63) print('{:^30} | {:^30}'.format('good', 'bad')) print('-' * 63) for key in sorted(match_dict): print('{:>30} | {:>30}'.format(match_dict[key], key)) print('-' * 63) else: print('No matched files!!') if replace_files: if verbose: print('Now replacing "bad" with matched "good" files.') for b_name in match_dict: shutil.move(match_dict[b_name], b_name) if verbose: print('Done replacing files.')
def energies_df(pt_run_dir): from paratemp.energy_bin_analysis import get_energies with cd(pt_run_dir): mi_df = get_energies('PT-out') return mi_df
def cleanup_bad_gromacs_restart(out_base, working_dir='./', list_files=True, replace_files=False, verbose=True): """ Replace "new" files with GROMACS backed-up files after messed-up restart No timestamps are accounted for, and this is purely based on the file names and the default way GROMACS backs up files it would have otherwise replaced. :param str out_base: Base name for output files, likely the same as the '-deffnm' argument. :param str working_dir: Directory in which to look and do these replacements :param str list_files: If true, matched and unmatched files will all be printed :param bool replace_files: If true, the backed-up files will be moved to overwrite the "new" files. :param bool verbose: If true, more file counts and such will be printed. :return: None """ with cd(working_dir): good_files = glob.glob('#'+out_base+'*') bad_files = glob.glob(out_base+'*') good_files.sort() bad_files.sort() if verbose: print('Found {} "bad" and {} "good" files.'.format(len( bad_files), len(good_files))) match_dict = dict() unmatched_good = list() unmatched_bad = list(bad_files) for g_name in good_files: poss_bad_name = g_name[1:-3] # remove # from start and end and .1 if poss_bad_name in bad_files: unmatched_bad.remove(poss_bad_name) match_dict[poss_bad_name] = g_name else: unmatched_good.append(g_name) if verbose: print('Total of {} matched files.'.format(len(match_dict))) if len(unmatched_good) + len(unmatched_bad) != 0 and verbose: print('Unmatched file counts:\n ' 'good:{:>3}\n bad:{:>3}'.format(len(unmatched_good), len(unmatched_bad))) elif verbose: print('No unmatched files.') if list_files: if len(unmatched_good) != 0: print('Unmatched "good" files:') for g_name in unmatched_good: print(' {}'.format(g_name)) else: print('No unmatched "good" files') if len(unmatched_bad) != 0: print('Unmatched "bad" files:') for b_name in unmatched_bad: print(' {}'.format(b_name)) else: print('No unmatched "bad" files') if len(match_dict) > 0: print('Matched files:\n') print('-'*63) print('{:^30} | {:^30}'.format('good', 'bad')) print('-'*63) for key in sorted(match_dict): print('{:>30} | {:>30}'.format(match_dict[key], key)) print('-'*63) else: print('No matched files!!') if replace_files: if verbose: print('Now replacing "bad" with matched "good" files.') for b_name in match_dict: shutil.move(match_dict[b_name], b_name) if verbose: print('Done replacing files.')