def make_model_devi(iter_index, jdata, mdata): """calculate the model deviation of the rest idx""" iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, model_devi_name) create_path(work_path) # link the model train_path = os.path.join(iter_name, train_name) train_path = os.path.abspath(train_path) models = glob.glob(os.path.join(train_path, "graph*pb")) for mm in models: model_name = os.path.basename(mm) os.symlink(mm, os.path.join(work_path, model_name)) # link the last rest data last_iter_name = make_iter_name(iter_index - 1) rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name) if not os.path.exists(rest_data_path): return False for jj, subsystem in enumerate(os.listdir(rest_data_path)): task_name = "task.%03d.%06d" % (0, jj) task_path = os.path.join(work_path, task_name) create_path(task_path) os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)), os.path.abspath(os.path.join(task_path, rest_data_name))) return True
def make_fp(iter_index, jdata, mdata): iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) create_path(work_path) picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name) if jdata.get("labeled", False): dlog.info("already labeled, skip make_fp and link data directly") os.symlink(os.path.abspath(picked_data_path), os.path.abspath(os.path.join(work_path, "task.%03d" % 0))) os.symlink(os.path.abspath(picked_data_path), os.path.abspath(os.path.join(work_path, "data.%03d" % 0))) return systems = get_systems(picked_data_path, jdata) fp_style = jdata['fp_style'] if 'user_fp_params' in jdata.keys(): fp_params = jdata['user_fp_params'] else: fp_params = jdata['fp_params'] jj = 0 for system in systems: for subsys in system: sys_data = subsys.data task_name = "task.%03d.%06d" % (0, jj) task_path = os.path.join(work_path, task_name) create_path(task_path) if fp_style == "gaussian": ret = make_gaussian_input(sys_data, fp_params) with open(os.path.join(task_path, 'input'), 'w') as fp: fp.write(ret) else: # TODO: support other formats raise RuntimeError("unsupported fp style") jj += 1
def init_model(iter_index, jdata, mdata): training_init_model = jdata.get('training_init_model', False) if not training_init_model: return iter0_models = [] training_iter0_model = jdata.get('training_iter0_model_path', []) if type(training_iter0_model) == str: training_iter0_model = [training_iter0_model] for ii in training_iter0_model: model_is = glob.glob(ii) model_is.sort() iter0_models += [os.path.abspath(ii) for ii in model_is] numb_models = jdata['numb_models'] assert ( numb_models == len(iter0_models) ), "training_iter0_model_path should be provided, and the number of models should be equal to %d" % numb_models work_path = os.path.join(make_iter_name(iter_index), train_name) create_path(work_path) cwd = os.getcwd() for ii in range(len(iter0_models)): train_path = os.path.join(work_path, train_task_fmt % ii) create_path(train_path) os.chdir(train_path) ckpt_files = glob.glob(os.path.join(iter0_models[ii], 'model.ckpt*')) for jj in ckpt_files: os.symlink(jj, os.path.basename(jj)) os.chdir(cwd)
def make_fp_labeled(iter_index, jdata): dlog.info("already labeled, skip make_fp and link data directly") pick_data = jdata['pick_data'] use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) create_path(work_path) picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name) if use_clusters: os.symlink( os.path.abspath(picked_data_path), os.path.abspath( os.path.join(work_path, "task." + data_system_fmt % 0))) os.symlink( os.path.abspath(picked_data_path), os.path.abspath( os.path.join(work_path, "data." + data_system_fmt % 0))) else: picked_data_path = os.path.abspath(picked_data_path) sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern)) cwd = os.getcwd() os.chdir(work_path) for ii in sys_path: sys_idx = os.path.basename(ii).split('.')[1] data_dir = 'data.' + data_system_fmt % int(sys_idx) task_dir = 'task.' + data_system_fmt % int(sys_idx) os.symlink(os.path.relpath(ii), data_dir) os.symlink(os.path.relpath(ii), task_dir) os.chdir(cwd)
def init_pick(iter_index, jdata, mdata): """pick up init data from dataset randomly""" pick_data = jdata['pick_data'] init_pick_number = jdata['init_pick_number'] # use MultiSystems with System # TODO: support System and LabeledSystem # TODO: support other format systems = get_systems(pick_data, jdata) # label the system labels = [] for key, system in systems.systems.items(): labels.extend([(key, j) for j in range(len(system))]) # random pick iter_name = make_iter_name(iter_index) create_path(iter_name) work_path = os.path.join(iter_name, model_devi_name) create_path(work_path) idx = np.arange(len(labels)) np.random.shuffle(idx) pick_idx = idx[:init_pick_number] rest_idx = idx[init_pick_number:] # dump the init data picked_systems = dpdata.MultiSystems() for j in pick_idx: sys_name, sys_id = labels[j] picked_systems.append(systems[sys_name][sys_id]) sys_data_path = os.path.join(work_path, picked_data_name) picked_systems.to_deepmd_raw(sys_data_path) picked_systems.to_deepmd_npy(sys_data_path, set_size=init_pick_number) # dump the rest data rest_systems = dpdata.MultiSystems() for j in rest_idx: sys_name, sys_id = labels[j] rest_systems.append(systems[sys_name][sys_id]) sys_data_path = os.path.join(work_path, rest_data_name) rest_systems.to_deepmd_raw(sys_data_path) rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
def make_fp_configs(iter_index, jdata): pick_data = jdata['pick_data'] use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) create_path(work_path) picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name) if use_clusters: systems = get_multi_system(picked_data_path, jdata) jj = 0 for system in systems: for subsys in system: task_name = "task." + fp_task_fmt % (0, jj) task_path = os.path.join(work_path, task_name) create_path(task_path) subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR')) jj += 1 else: picked_data_path = os.path.abspath(picked_data_path) sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern)) for ii in sys_path: tmp_sys = dpdata.System(ii, fmt='deepmd/npy') sys_idx = os.path.basename(ii).split('.')[1] jj = 0 for ss in tmp_sys: task_name = "task." + fp_task_fmt % (int(sys_idx), jj) task_path = os.path.join(work_path, task_name) create_path(task_path) ss.to('vasp/poscar', os.path.join(task_path, 'POSCAR')) job = {} with open(os.path.join(task_path, 'job.json'), 'w') as fp: json.dump(job, fp, indent=4) jj += 1
def init_pick(iter_index, jdata, mdata): """pick up init data from dataset randomly""" pick_data = jdata['pick_data'] init_pick_number = jdata['init_pick_number'] use_clusters = jdata.get('use_clusters', False) # use MultiSystems with System # TODO: support System and LabeledSystem # TODO: support other format if use_clusters: systems = get_multi_system(pick_data, jdata) else: systems = get_systems(pick_data, jdata) # label the system labels = [] if use_clusters: items = systems.systems.items() else: items = systems.items() for key, system in items: labels.extend([(key, j) for j in range(len(system))]) # random pick iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, model_devi_name) create_path(work_path) idx = np.arange(len(labels)) np.random.shuffle(idx) pick_idx = idx[:init_pick_number] rest_idx = idx[init_pick_number:] # dump the init data sys_data_path = os.path.join(work_path, picked_data_name) _init_dump_selected_frames(systems, labels, pick_idx, sys_data_path, jdata) # dump the rest data sys_data_path = os.path.join(work_path, rest_data_name) _init_dump_selected_frames(systems, labels, rest_idx, sys_data_path, jdata)
def make_model_devi(iter_index, jdata, mdata): """calculate the model deviation of the rest idx""" pick_data = jdata['pick_data'] use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, model_devi_name) create_path(work_path) # link the model train_path = os.path.join(iter_name, train_name) train_path = os.path.abspath(train_path) models = glob.glob(os.path.join(train_path, "graph*pb")) for mm in models: model_name = os.path.basename(mm) os.symlink(mm, os.path.join(work_path, model_name)) # link the last rest data last_iter_name = make_iter_name(iter_index - 1) rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name) if not os.path.exists(rest_data_path): return False if use_clusters: for jj, subsystem in enumerate(os.listdir(rest_data_path)): task_name = "task." + model_devi_task_fmt % (0, jj) task_path = os.path.join(work_path, task_name) create_path(task_path) os.symlink( os.path.abspath(os.path.join(rest_data_path, subsystem)), os.path.abspath(os.path.join(task_path, rest_data_name))) else: rest_data_path = os.path.abspath(rest_data_path) sys_path = glob.glob(os.path.join(rest_data_path, sys_name_pattern)) cwd = os.getcwd() for ii in sys_path: task_name = "task." + model_devi_task_fmt % (int( os.path.basename(ii).split('.')[1]), 0) task_path = os.path.join(work_path, task_name) create_path(task_path) os.chdir(task_path) os.symlink(os.path.relpath(ii), rest_data_name) os.chdir(cwd) os.chdir(cwd) return True