def make_scf(systems_train, systems_test=None, *, train_dump="data_train", test_dump="data_test", no_model=False, workdir='00.scf', share_folder='share', source_arg="scf_input.yaml", source_model="model.pth", dispatcher=None, resources=None, sub_size=1, group_size=1, ingroup_parallel=1, sub_res=None, python='python', cleanup=False, **task_args): run_scf = make_run_scf(systems_train, systems_test, train_dump=train_dump, test_dump=test_dump, no_model=no_model, group_data=False, workdir=".", outlog="log.scf", share_folder=share_folder, source_arg=source_arg, source_model=source_model, dispatcher=dispatcher, resources=resources, group_size=group_size, ingroup_parallel=ingroup_parallel, sub_size=sub_size, sub_res=sub_res, python=python, **task_args) post_scf = make_stat_scf(systems_train=systems_train, systems_test=systems_test, train_dump=train_dump, test_dump=test_dump, workdir=".", outlog="log.data", group_data=False) # concat seq = [run_scf, post_scf] if cleanup: clean_scf = make_cleanup(["slurm-*.out", "task.*/err", "fin.record"], workdir=".") seq.append(clean_scf) # make sequence return Sequence(seq, workdir=workdir)
def make_train(source_train="data_train", source_test="data_test", *, restart=True, source_model="model.pth", save_model="model.pth", source_arg="train_input.yaml", workdir="01.train", share_folder="share", dispatcher=None, resources=None, python="python", cleanup=False, **task_args): run_train = make_run_train(source_train=source_train, source_test=source_test, restart=restart, source_model=source_model, save_model=save_model, source_arg=source_arg, workdir=".", share_folder=share_folder, outlog="log.train", dispatcher=dispatcher, resources=resources, python=python, **task_args) post_train = make_test_train(data_paths=["data_train/*", "data_test/*"], model_file=save_model, output_prefix="test", group_results=True, workdir=".", outlog="log.test") # concat seq = [run_train, post_train] if cleanup: clean_train = make_cleanup( ["slurm-*.out", "err", "fin.record", "tag_*finished"], workdir=".") seq.append(clean_train) # make sequence return Sequence(seq, workdir=workdir)
call_kwargs=scf_input, outlog='log.scf', workdir='00.scf', link_prev_files=['model.pth'], share_folder='share', link_share_files=['mol_files.raw']) task_data = PythonTask(collect_data_grouped, call_args=[train_idx], outlog='log.data', workdir='01.data', link_prev_files=['model.pth', "results"], share_folder='share', link_share_files=['e_ref.npy']) task_train = PythonTask(train_main, call_args=["old_model.pth"], call_kwargs=train_input, outlog='log.train', workdir='02.train', link_prev_files=[('model.pth', 'old_model.pth'), 'train_paths.raw', 'test_paths.raw']) seq = Sequence([task_scf, task_data, task_train]) iterate = Iteration(seq, niter, init_folder='share/init', record_file='RECORD') if os.path.exists('RECORD'): iterate.restart() else: iterate.run()
def make_iterate(systems_train=None, systems_test=None, n_iter=0, *, proj_basis=None, workdir=".", share_folder="share", scf_input=True, scf_machine=None, train_input=True, train_machine=None, init_model=False, init_scf=True, init_train=True, init_scf_machine=None, init_train_machine=None, cleanup=False, strict=True): r""" Make a `Workflow` to do the iterative training procedure. The procedure will be conducted in `workdir` for `n_iter` iterations. Each iteration of the procedure is done in sub-folder ``iter.XX``, which further containes two sub-folders, ``00.scf`` and ``01.train``. The `Workflow` is only created but not executed. Parameters ---------- systems_train: str or list of str, optional System paths used as training set in the procedure. These paths can refer to systems or a file that contains multiple system paths. Systems must be .xyz files or folders contains .npy files. If not given, use ``$share_folder/systems_train.raw`` as default. systems_test: str or list of str, optional System paths used as testing (or validation) set in the procedure. The format is same as `systems_train`. If not given, use the last system in the training set as testing system. n_iter: int, optional The number of iterations to do. Default is 0. proj_basis: str, optional The basis set used to project the density matrix onto. Can be a `.npz` file specifying the coefficients in pyscf's format. If not given, use the default basis. workdir: str, optional The working directory. Default is current directory (`.`). share_folder: str, optional The folder to store shared files in the iteration, including ``scf_input.yaml``, ``train_input.yaml``, and possibly files for initialization. Default is ``share``. scf_input: bool or str or dict, optional Arguments used to specify the SCF calculation. If given `None` or `False`, bypass the checking and use program default (unreliable). Otherwise, the arguments would be saved as a YAML file at ``$share_folder/scf_input.yaml`` and used for SCF calculation. Default is `True`, which will check and use the existing file. If given a string of file path, copy the corresponding file into target location. If given a dict, dump it into the target file. scf_machine: str or dict, optional Arguments used to specify the job settings of SCF calculation, including submitting method, resources, group size, etc.. If given a string of file path, load that file as a dict using YAML format. If not given, using program default setup. train_input: bool or str or dict, optional Arguments used to specify the training of neural network. It follows the same rule as `scf_input`, only that the target location is ``$share_folder/train_input.yaml``. train_machine: str or dict, optional Arguments used to specify the job settings of NN training. It Follows the same rule as `scf_machine`, but without group. init_model: bool or str, optional Decide whether to use an existing model as the starting point. If set to `False` (default), use `init_scf` and `init_train` to run an extra initialization iteration in folder ``iter.init``. If set to `True`, look for a model at ``$share_folder/init/model.pth``. If given a string of path, copy that file into target location. init_scf: bool or str or dict, optional Similar to `scf_input` but used for init calculation. The target location is ``$share_folder/init_scf.yaml``. Defaults to True. init_scf_machine: str or dict, optional If specified, use different machine settings for init scf jobs. init_train: bool or str or dict, optional Similar to `train_input` but used for init calculation. The target location is ``$share_folder/init_train.yaml``. Defaults to True. init_train_machine: str or dict, optional If specified, use different machine settings for init training job. cleanup: bool, optional Whether to remove job files during calculation, such as ``slurm-*.out`` and ``err``. Defaults to False. strict: bool, optional Whether to allow additional arguments to be passed to task constructor, through `scf_machine` and `train_machine`. Defaults to True. Returns ------- iterate: Iteration (subclass of Workflow) An instance of workflow that can be executed by `iterate.run()`. Raises ------ FileNotFoundError Raise an Error when the system or argument files are required but not found in the share folder. """ # check share folder contains required data # and collect the systems into share folder if systems_train is None: # load default training systems default_train = os.path.join(share_folder, DEFAULT_TRAIN) assert_exist(default_train) # must have training systems. systems_train = default_train systems_train = collect_systems(systems_train, os.path.join(share_folder, SYS_TRAIN)) # check test systems if systems_test is None: # try to load default testing systems default_test = os.path.join(share_folder, DEFAULT_TEST) if os.path.exists(default_test): # if exists then use it systems_test = default_test else: # if empty use last one of training system systems_test = systems_train[-1] systems_test = collect_systems(systems_test, os.path.join(share_folder, SYS_TEST)) # check share folder contains required yaml file scf_args_name = check_share_folder(scf_input, SCF_ARGS_NAME, share_folder) train_args_name = check_share_folder(train_input, TRN_ARGS_NAME, share_folder) # check required machine parameters scf_machine = check_arg_dict(scf_machine, DEFAULT_SCF_MACHINE, strict) train_machine = check_arg_dict(train_machine, DEFAULT_TRN_MACHINE, strict) # handle projection basis if proj_basis is not None: save_basis(os.path.join(share_folder, PROJ_BASIS), load_basis(proj_basis)) proj_basis = PROJ_BASIS # make tasks scf_step = make_scf( systems_train=systems_train, systems_test=systems_test, train_dump=DATA_TRAIN, test_dump=DATA_TEST, no_model=False, workdir=SCF_STEP_DIR, share_folder=share_folder, source_arg=scf_args_name, source_model=MODEL_FILE, source_pbasis=proj_basis, cleanup=cleanup, **scf_machine ) train_step = make_train( source_train=DATA_TRAIN, source_test=DATA_TEST, restart=True, source_model=MODEL_FILE, save_model=MODEL_FILE, source_pbasis=proj_basis, source_arg=train_args_name, workdir=TRN_STEP_DIR, share_folder=share_folder, cleanup=cleanup, **train_machine ) per_iter = Sequence([scf_step, train_step]) iterate = Iteration(per_iter, n_iter, workdir=".", record_file=os.path.join(workdir, RECORD)) # make init if init_model: # if set true or give str, check share/init/model.pth init_folder=os.path.join(share_folder, "init") check_share_folder(init_model, MODEL_FILE, init_folder) iterate.set_init_folder(init_folder) elif init_scf or init_train: # otherwise, make an init iteration to train the first model init_scf_name = check_share_folder(init_scf, INIT_SCF_NAME, share_folder) init_train_name = check_share_folder(init_train, INIT_TRN_NAME, share_folder) init_scf_machine = (check_arg_dict(init_scf_machine, DEFAULT_SCF_MACHINE, strict) if init_scf_machine is not None else scf_machine) init_train_machine = (check_arg_dict(init_train_machine, DEFAULT_SCF_MACHINE, strict) if init_train_machine is not None else train_machine) scf_init = make_scf( systems_train=systems_train, systems_test=systems_test, train_dump=DATA_TRAIN, test_dump=DATA_TEST, no_model=True, workdir=SCF_STEP_DIR, share_folder=share_folder, source_arg=init_scf_name, source_model=None, source_pbasis=proj_basis, cleanup=cleanup, **scf_machine ) train_init = make_train( source_train=DATA_TRAIN, source_test=DATA_TEST, restart=False, source_model=MODEL_FILE, save_model=MODEL_FILE, source_pbasis=proj_basis, source_arg=init_train_name, workdir=TRN_STEP_DIR, share_folder=share_folder, cleanup=cleanup, **train_machine ) init_iter = Sequence([scf_init, train_init], workdir="iter.init") iterate.prepend(init_iter) return iterate
workdir=f'task.{i:02}', share_folder="share", link_share_files=["input.yaml"], link_prev_files=['train_paths.raw', 'test_paths.raw']) for i in range(nmodel) ] run_train = GroupBatchTask(batch_train, resources=train_res, outlog="log.train", link_prev_files=[('model.pth', 'old_model.pth')]) post_train = ShellTask("ln -s task.00/model.pth .") clean_train = ShellTask("rm slurm-*.out") train_flow = Sequence([run_train, post_train, clean_train], workdir='00.train') # Define SCF ngroup = 12 mol_files = np.loadtxt('share/mol_files.raw', dtype=str) group_files = [mol_files[i::ngroup] for i in range(ngroup)] envs = {"PYSCF_MAX_MEMORY": 32000} scf_res = { "cpus_per_task": 5, "time_limit": "6:00:00", "mem_limit": 32, "envs": envs }