def make_stat_scf(systems_train, systems_test=None, *, train_dump="data_train", test_dump="data_test", group_data=False, workdir='.', outlog="log.data", **stat_args): # follow same convention for systems as run_scf systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)] systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)] if not systems_test: systems_test.append(systems_train[-1]) # if len(systems_train) > 1: # del systems_train[-1] # load stats function from deepks.scf.stats import print_stats stat_args.update(systems=systems_train, test_sys=systems_test, dump_dir=train_dump, test_dump=test_dump, group=group_data) # make task return PythonTask(print_stats, call_kwargs=stat_args, outlog=outlog, errlog="err", workdir=workdir)
def collect_systems(systems, folder=None): # check all systems have different basename # if there's duplicate, concat its dirname into the basename sep by a "." # then collect all systems into `folder` by symlink sys_list = [os.path.abspath(s) for s in load_sys_paths(systems)] parents, bases = map(list, zip(*[os.path.split(s.rstrip(os.path.sep)) for s in sys_list])) dups = range(len(sys_list)) while True: count_dict = {bases[i]:[] for i in dups} for i in dups: count_dict[bases[i]].append(i) dup_dict = {k:v for k,v in count_dict.items() if len(v)>1} if not dup_dict: break dups = sum(dup_dict.values(), []) if all(parents[i] in ("/", "") for i in dups): print("System list have duplicated terms, index:", dups, file=sys.stderr) break for di in dups: if parents[di] in ("/", ""): continue newp, newb = os.path.split(parents[di]) parents[di] = newp bases[di] = f"{newb}.{bases[di]}" if folder is None: return bases targets = [os.path.join(folder, b) for b in bases] for s, t in zip(sys_list, targets): link_file(s, t, use_abs=True) return targets
def make_scf_task(*, workdir=".", arg_file="scf_input.yaml", source_arg=None, model_file="model.pth", source_model=None, systems="systems.raw", link_systems=True, dump_dir="results", share_folder="share", outlog="log.scf", group_data=None, dispatcher=None, resources=None, python="python", **task_args): # set up basic args command = SCF_CMD.format(python=python) link_share = task_args.pop("link_share_files", []) link_prev = task_args.pop("link_prev_files", []) link_abs = task_args.pop("link_abs_files", []) forward_files = task_args.pop("forward_files", []) backward_files = task_args.pop("backward_files", []) sys_name = None #set up optional args if arg_file: command += f" {arg_file}" if source_arg is not None: link_share.append((source_arg, arg_file)) forward_files.append(arg_file) if model_file: command += f" -m {model_file}" if model_file.upper() != "NONE": if source_model is not None: link_prev.append((source_model, model_file)) forward_files.append(model_file) if systems: # check system paths and make forward files sys_paths = [os.path.abspath(s) for s in load_sys_paths(systems)] sys_base = [get_sys_name(s) for s in sys_paths] sys_name = [os.path.basename(s) for s in sys_base] if link_systems: target_dir = "systems" src_files = sum((glob(f"{base}*") for base in sys_base), []) for fl in src_files: dst = os.path.join(target_dir, os.path.basename(fl)) link_abs.append((fl, dst)) forward_files.append(target_dir) sys_str = os.path.join(target_dir, "*") else: # cannot forward files here sys_str = " ".join(sys_paths) command += f" -s {sys_str}" if dump_dir: command += f" -d {dump_dir}" if sys_name: for nm in sys_name: backward_files.append(os.path.join(dump_dir, nm)) else: # backward whole folder, may cause problem backward_files.append(dump_dir) if group_data is not None: command += " -G" if group_data else " -NG" # make task return BatchTask(command, workdir=workdir, dispatcher=dispatcher, resources=resources, outlog=outlog, share_folder=share_folder, link_share_files=link_share, link_prev_files=link_prev, link_abs_files=link_abs, forward_files=forward_files, backward_files=backward_files, **task_args)
def make_run_scf(systems_train, systems_test=None, *, train_dump="data_train", test_dump="data_test", no_model=False, group_data=None, workdir='.', share_folder='share', outlog="log.scf", source_arg="scf_input.yaml", source_model="model.pth", dispatcher=None, resources=None, sub_size=1, group_size=1, ingroup_parallel=1, sub_res=None, python='python', **task_args): # if no test systems, use last one in train systems systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)] systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)] if not systems_test: systems_test.append(systems_train[-1]) # if len(systems_train) > 1: # del systems_train[-1] check_system_names(systems_train) check_system_names(systems_test) # split systems into groups nsys_trn = len(systems_train) nsys_tst = len(systems_test) ntask_trn = int(np.ceil(nsys_trn / sub_size)) ntask_tst = int(np.ceil(nsys_tst / sub_size)) train_sets = [systems_train[i::ntask_trn] for i in range(ntask_trn)] test_sets = [systems_test[i::ntask_tst] for i in range(ntask_tst)] # make subtasks model_file = "../model.pth" if not no_model else "NONE" nd = max(len(str(ntask_trn + ntask_tst)), 2) if sub_res is None: sub_res = {} sub_res = {**DEFAULT_SCF_SUB_RES, **sub_res} trn_tasks = [ make_scf_task(systems=sset, workdir=f"task.trn.{i:0{nd}}", arg_file="../scf_input.yaml", source_arg=None, model_file=model_file, source_model=None, dump_dir=f"../{train_dump}", group_data=group_data, link_systems=True, resources=sub_res, python=python) for i, sset in enumerate(train_sets) ] tst_tasks = [ make_scf_task(systems=sset, workdir=f"task.tst.{i:0{nd}}", arg_file="../scf_input.yaml", source_arg=None, model_file=model_file, source_model=None, dump_dir=f"../{test_dump}", group_data=group_data, link_systems=True, resources=sub_res, python=python) for i, sset in enumerate(test_sets) ] # set up optional args link_share = task_args.pop("link_share_files", []) link_share.append((source_arg, "scf_input.yaml")) link_prev = task_args.pop("link_prev_files", []) if not no_model: link_prev.append((source_model, "model.pth")) if resources is None: resources = {} resources = {**DEFAULT_SCF_RES, "numb_node": ingroup_parallel, **resources} # make task return GroupBatchTask(trn_tasks + tst_tasks, group_size=group_size, ingroup_parallel=ingroup_parallel, dispatcher=dispatcher, resources=resources, outlog=outlog, errlog="err", share_folder=share_folder, link_share_files=link_share, link_prev_files=link_prev)
def main(systems, model_file="model.pth", basis='ccpvdz', proj_basis=None, penalty_terms=None, device=None, dump_dir=".", dump_fields=DEFAULT_FNAMES, group=False, mol_args=None, scf_args=None, verbose=0): if model_file is None or model_file.upper() == "NONE": model = None default_scf_args = DEFAULT_HF_ARGS else: model = CorrNet.load(model_file).double() default_scf_args = DEFAULT_SCF_ARGS # check arguments penalty_terms = check_list(penalty_terms) if mol_args is None: mol_args = {} if scf_args is None: scf_args = {} scf_args = {**default_scf_args, **scf_args} fields = select_fields(dump_fields) # check label names from label fields and penalties label_names = get_required_labels(fields["scf"] + fields["grad"], penalty_terms) if verbose: print(f"starting calculation with OMP threads: {lib.num_threads()}", f"and max memory: {lib.param.MAX_MEMORY}") if verbose > 1: print(f"basis: {basis}") print(f"specified scf args:\n {scf_args}") meta = old_meta = None res_list = [] systems = load_sys_paths(systems) for fl in systems: fl = fl.rstrip(os.path.sep) for atom, attrs, labels in system_iter(fl, label_names): mol_input = { **mol_args, "verbose": verbose, "atom": atom, "basis": basis, **attrs } mol = build_mol(**mol_input) penalties = [build_penalty(pd, labels) for pd in penalty_terms] try: meta, result = solve_mol(mol, model, fields, labels, proj_basis=proj_basis, penalties=penalties, device=device, verbose=verbose, **scf_args) except Exception as e: print(fl, 'failed! error:', e, file=sys.stderr) # continue raise if group and old_meta is not None and np.any(meta != old_meta): break res_list.append(result) if not group: sub_dir = os.path.join(dump_dir, get_sys_name(os.path.basename(fl))) dump_meta(sub_dir, meta) dump_data(sub_dir, **collect_fields(fields, meta, res_list)) res_list = [] elif old_meta is not None and np.any(meta != old_meta): print(fl, 'meta does not match! saving previous results only.', file=sys.stderr) break old_meta = meta if verbose: print(fl, 'finished') if group: dump_meta(dump_dir, meta) dump_data(dump_dir, **collect_fields(fields, meta, res_list)) if verbose: print('group finished')