Esempio n. 1
0
def make_stat_scf(systems_train,
                  systems_test=None,
                  *,
                  train_dump="data_train",
                  test_dump="data_test",
                  group_data=False,
                  workdir='.',
                  outlog="log.data",
                  **stat_args):
    # follow same convention for systems as run_scf
    systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)]
    systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)]
    if not systems_test:
        systems_test.append(systems_train[-1])
        # if len(systems_train) > 1:
        #     del systems_train[-1]
    # load stats function
    from deepks.scf.stats import print_stats
    stat_args.update(systems=systems_train,
                     test_sys=systems_test,
                     dump_dir=train_dump,
                     test_dump=test_dump,
                     group=group_data)
    # make task
    return PythonTask(print_stats,
                      call_kwargs=stat_args,
                      outlog=outlog,
                      errlog="err",
                      workdir=workdir)
Esempio n. 2
0
def make_test_train(data_paths,
                    model_file="model.pth",
                    *,
                    output_prefix="test",
                    group_results=True,
                    workdir='.',
                    outlog="log.test",
                    **test_args):
    from deepks.model.test import main as test_func
    test_args.update(data_paths=data_paths,
                     model_file=model_file,
                     output_prefix=output_prefix,
                     group=group_results)
    # make task
    return PythonTask(test_func,
                      call_kwargs=test_args,
                      outlog=outlog,
                      errlog="err",
                      workdir=workdir)
Esempio n. 3
0
from deepks.task.task import PythonTask
from deepks.task.workflow import Sequence, Iteration

niter = 5
nmol = 1500
ntrain = 1000
ntest = 500

train_input = load_yaml('share/train_input.yaml')
scf_input = load_yaml('share/scf_input.yaml')
train_idx = np.arange(ntrain)

task_scf = PythonTask(scf_main,
                      call_kwargs=scf_input,
                      outlog='log.scf',
                      workdir='00.scf',
                      link_prev_files=['model.pth'],
                      share_folder='share',
                      link_share_files=['mol_files.raw'])

task_data = PythonTask(collect_data_grouped,
                       call_args=[train_idx],
                       outlog='log.data',
                       workdir='01.data',
                       link_prev_files=['model.pth', "results"],
                       share_folder='share',
                       link_share_files=['e_ref.npy'])

task_train = PythonTask(train_main,
                        call_args=["old_model.pth"],
                        call_kwargs=train_input,
Esempio n. 4
0
                         dispatcher=disp,
                         resources=scf_res,
                         outlog="log.scf",
                         link_prev_files=['model.pth'],
                         forward_files=['model.pth'],
                         backward_files=['results/*'])

all_idx = np.loadtxt('share/index.raw', dtype=int)
train_idx = all_idx[:ntrain]
test_idx = all_idx[ntrain:]

post_scf = PythonTask(collect_data,
                      call_args=[train_idx, test_idx],
                      call_kwargs={
                          "sys_dir": "results",
                          "ene_ref": "e_ref.npy"
                      },
                      outlog='log.data',
                      share_folder='share',
                      link_share_files=['e_ref.npy'])

clean_scf = ShellTask("rm slurm-*.out")

scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf')

# Group them together
per_iter = Sequence([train_flow, scf_flow])
iterate = Iteration(per_iter,
                    niter,
                    init_folder='share/init',
                    record_file='RECORD')
Esempio n. 5
0
    shutil.copy('results/system.raw', 'train')
    shutil.copy('results/system.raw', 'test')
    Path('train_paths.raw').write_text(str(Path('train').absolute()))
    Path('test_paths.raw').write_text(str(Path('test').absolute()))


niter = 5
nmol = 1500
ntrain = 1000
ntest = 500

train_input = load_yaml('share/train_input.yaml')
scf_input = load_yaml('share/scf_input.yaml')

task_train = PythonTask(train_main, call_kwargs=train_input,
                        outlog='log.train',
                        workdir='00.train',
                        link_prev_files=['train_paths.raw', 'test_paths.raw'])

task_scf = PythonTask(scf_main, call_kwargs=scf_input,
                      outlog='log.scf',
                      workdir='01.scf',
                      link_prev_files=['model.pth'],
                      share_folder='share', link_share_files=['mol_files.raw'])

task_data = PythonTask(collect_data, call_args=[nmol, ntrain],
                       outlog='log.data',
                       workdir='02.data',
                       link_prev_files=['results'],
                       share_folder='share', link_share_files=['e_ref.npy'])

seq = Sequence([task_train, task_scf, task_data])
Esempio n. 6
0
        for i in range(nsys)
    ],
    workdir='00.scf',
    outlog='log.scf',
    resources=scf_res,
    link_prev_files=['model.pth'])

# labeling

task_data = PythonTask(lambda: [
    collect_data_grouped(train_idx=train_idx,
                         append=True,
                         ene_ref=f"e_ref.npy",
                         force_ref=f"f_ref.npy",
                         sys_dir=f"results") for i in range(nsys)
],
                       outlog='log.data',
                       workdir='01.data',
                       link_prev_files=['model.pth'] +
                       [f"results" for i in range(nsys)],
                       share_folder='share',
                       link_share_files=[f'e_ref.npy' for i in range(nsys)] +
                       [f'f_ref.npy' for i in range(nsys)])

# training

train_cmd = " ".join([
    "python -u /path/to/source/deepks/train/main.py", "train_input.yaml",
    "--restart old_model.pth"
])

train_res = {"time_limit": "24:00:00", "mem_limit": 32, "numb_gpu": 1}
Esempio n. 7
0
    old_trn = np.loadtxt("train_paths.raw", dtype=str)
    old_tst = np.loadtxt("test_paths.raw", dtype=str)
    trn_res = np.stack(
        [np.loadtxt(f"{m}/test/train.all.out")[:, 1] for m in paths], -1)
    tst_res = np.stack(
        [np.loadtxt(f"{m}/test/test.all.out")[:, 1] for m in paths], -1)

    tst_std = np.std(tst_res, axis=-1)
    order = np.argsort(tst_std)[::-1]
    sel = order[:nsel]
    rst = np.sort(order[nsel:])

    new_trn = np.concatenate([old_trn, old_tst[sel]])
    new_tst = old_tst[rst]
    np.savetxt("new_train_paths.raw", new_trn, fmt="%s")
    np.savetxt("new_test_paths.raw", new_tst, fmt="%s")


task_select = PythonTask(select_data, call_args=[nsel])

# combine them together
iterate = Iteration([task_train, task_test, task_select],
                    niter,
                    init_folder='share/init',
                    record_file='RECORD')

if os.path.exists('RECORD'):
    iterate.restart()
else:
    iterate.run()