Esempio n. 1
0
def make_model_devi(iter_index, jdata, mdata):
    """calculate the model deviation of the rest idx"""
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, model_devi_name)
    create_path(work_path)
    # link the model
    train_path = os.path.join(iter_name, train_name)
    train_path = os.path.abspath(train_path)
    models = glob.glob(os.path.join(train_path, "graph*pb"))
    for mm in models:
        model_name = os.path.basename(mm)
        os.symlink(mm, os.path.join(work_path, model_name))
    # link the last rest data
    last_iter_name = make_iter_name(iter_index - 1)
    rest_data_path = os.path.join(last_iter_name, model_devi_name,
                                  rest_data_name)
    if not os.path.exists(rest_data_path):
        return False
    for jj, subsystem in enumerate(os.listdir(rest_data_path)):
        task_name = "task.%03d.%06d" % (0, jj)
        task_path = os.path.join(work_path, task_name)
        create_path(task_path)
        os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)),
                   os.path.abspath(os.path.join(task_path, rest_data_name)))
    return True
Esempio n. 2
0
def make_fp(iter_index, jdata, mdata):
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, fp_name)
    create_path(work_path)
    picked_data_path = os.path.join(iter_name, model_devi_name,
                                    picked_data_name)
    if jdata.get("labeled", False):
        dlog.info("already labeled, skip make_fp and link data directly")
        os.symlink(os.path.abspath(picked_data_path),
                   os.path.abspath(os.path.join(work_path, "task.%03d" % 0)))
        os.symlink(os.path.abspath(picked_data_path),
                   os.path.abspath(os.path.join(work_path, "data.%03d" % 0)))
        return
    systems = get_systems(picked_data_path, jdata)
    fp_style = jdata['fp_style']
    if 'user_fp_params' in jdata.keys():
        fp_params = jdata['user_fp_params']
    else:
        fp_params = jdata['fp_params']
    jj = 0
    for system in systems:
        for subsys in system:
            sys_data = subsys.data
            task_name = "task.%03d.%06d" % (0, jj)
            task_path = os.path.join(work_path, task_name)
            create_path(task_path)
            if fp_style == "gaussian":
                ret = make_gaussian_input(sys_data, fp_params)
                with open(os.path.join(task_path, 'input'), 'w') as fp:
                    fp.write(ret)
            else:
                # TODO: support other formats
                raise RuntimeError("unsupported fp style")
            jj += 1
Esempio n. 3
0
def init_model(iter_index, jdata, mdata):
    training_init_model = jdata.get('training_init_model', False)
    if not training_init_model:
        return
    iter0_models = []
    training_iter0_model = jdata.get('training_iter0_model_path', [])
    if type(training_iter0_model) == str:
        training_iter0_model = [training_iter0_model]
    for ii in training_iter0_model:
        model_is = glob.glob(ii)
        model_is.sort()
        iter0_models += [os.path.abspath(ii) for ii in model_is]
    numb_models = jdata['numb_models']
    assert (
        numb_models == len(iter0_models)
    ), "training_iter0_model_path should be provided, and the number of models should be equal to %d" % numb_models
    work_path = os.path.join(make_iter_name(iter_index), train_name)
    create_path(work_path)
    cwd = os.getcwd()
    for ii in range(len(iter0_models)):
        train_path = os.path.join(work_path, train_task_fmt % ii)
        create_path(train_path)
        os.chdir(train_path)
        ckpt_files = glob.glob(os.path.join(iter0_models[ii], 'model.ckpt*'))
        for jj in ckpt_files:
            os.symlink(jj, os.path.basename(jj))
        os.chdir(cwd)
Esempio n. 4
0
def make_fp_labeled(iter_index, jdata):
    dlog.info("already labeled, skip make_fp and link data directly")
    pick_data = jdata['pick_data']
    use_clusters = jdata.get('use_clusters', False)
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, fp_name)
    create_path(work_path)
    picked_data_path = os.path.join(iter_name, model_devi_name,
                                    picked_data_name)
    if use_clusters:
        os.symlink(
            os.path.abspath(picked_data_path),
            os.path.abspath(
                os.path.join(work_path, "task." + data_system_fmt % 0)))
        os.symlink(
            os.path.abspath(picked_data_path),
            os.path.abspath(
                os.path.join(work_path, "data." + data_system_fmt % 0)))
    else:
        picked_data_path = os.path.abspath(picked_data_path)
        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
        cwd = os.getcwd()
        os.chdir(work_path)
        for ii in sys_path:
            sys_idx = os.path.basename(ii).split('.')[1]
            data_dir = 'data.' + data_system_fmt % int(sys_idx)
            task_dir = 'task.' + data_system_fmt % int(sys_idx)
            os.symlink(os.path.relpath(ii), data_dir)
            os.symlink(os.path.relpath(ii), task_dir)
        os.chdir(cwd)
Esempio n. 5
0
def init_pick(iter_index, jdata, mdata):
    """pick up init data from dataset randomly"""
    pick_data = jdata['pick_data']
    init_pick_number = jdata['init_pick_number']
    # use MultiSystems with System
    # TODO: support System and LabeledSystem
    # TODO: support other format
    systems = get_systems(pick_data, jdata)
    # label the system
    labels = []
    for key, system in systems.systems.items():
        labels.extend([(key, j) for j in range(len(system))])

    # random pick
    iter_name = make_iter_name(iter_index)
    create_path(iter_name)
    work_path = os.path.join(iter_name, model_devi_name)
    create_path(work_path)
    idx = np.arange(len(labels))
    np.random.shuffle(idx)
    pick_idx = idx[:init_pick_number]
    rest_idx = idx[init_pick_number:]

    # dump the init data
    picked_systems = dpdata.MultiSystems()
    for j in pick_idx:
        sys_name, sys_id = labels[j]
        picked_systems.append(systems[sys_name][sys_id])
    sys_data_path = os.path.join(work_path, picked_data_name)

    picked_systems.to_deepmd_raw(sys_data_path)
    picked_systems.to_deepmd_npy(sys_data_path, set_size=init_pick_number)

    # dump the rest data
    rest_systems = dpdata.MultiSystems()
    for j in rest_idx:
        sys_name, sys_id = labels[j]
        rest_systems.append(systems[sys_name][sys_id])
    sys_data_path = os.path.join(work_path, rest_data_name)
    rest_systems.to_deepmd_raw(sys_data_path)
    rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size)
Esempio n. 6
0
def make_fp_configs(iter_index, jdata):
    pick_data = jdata['pick_data']
    use_clusters = jdata.get('use_clusters', False)
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, fp_name)
    create_path(work_path)
    picked_data_path = os.path.join(iter_name, model_devi_name,
                                    picked_data_name)
    if use_clusters:
        systems = get_multi_system(picked_data_path, jdata)
        jj = 0
        for system in systems:
            for subsys in system:
                task_name = "task." + fp_task_fmt % (0, jj)
                task_path = os.path.join(work_path, task_name)
                create_path(task_path)
                subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
                jj += 1
    else:
        picked_data_path = os.path.abspath(picked_data_path)
        sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern))
        for ii in sys_path:
            tmp_sys = dpdata.System(ii, fmt='deepmd/npy')
            sys_idx = os.path.basename(ii).split('.')[1]
            jj = 0
            for ss in tmp_sys:
                task_name = "task." + fp_task_fmt % (int(sys_idx), jj)
                task_path = os.path.join(work_path, task_name)
                create_path(task_path)
                ss.to('vasp/poscar', os.path.join(task_path, 'POSCAR'))
                job = {}
                with open(os.path.join(task_path, 'job.json'), 'w') as fp:
                    json.dump(job, fp, indent=4)
                jj += 1
Esempio n. 7
0
def init_pick(iter_index, jdata, mdata):
    """pick up init data from dataset randomly"""
    pick_data = jdata['pick_data']
    init_pick_number = jdata['init_pick_number']
    use_clusters = jdata.get('use_clusters', False)
    # use MultiSystems with System
    # TODO: support System and LabeledSystem
    # TODO: support other format
    if use_clusters:
        systems = get_multi_system(pick_data, jdata)
    else:
        systems = get_systems(pick_data, jdata)
    # label the system
    labels = []
    if use_clusters:
        items = systems.systems.items()
    else:
        items = systems.items()
    for key, system in items:
        labels.extend([(key, j) for j in range(len(system))])

    # random pick
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, model_devi_name)
    create_path(work_path)
    idx = np.arange(len(labels))
    np.random.shuffle(idx)
    pick_idx = idx[:init_pick_number]
    rest_idx = idx[init_pick_number:]

    # dump the init data
    sys_data_path = os.path.join(work_path, picked_data_name)
    _init_dump_selected_frames(systems, labels, pick_idx, sys_data_path, jdata)

    # dump the rest data
    sys_data_path = os.path.join(work_path, rest_data_name)
    _init_dump_selected_frames(systems, labels, rest_idx, sys_data_path, jdata)
Esempio n. 8
0
def make_model_devi(iter_index, jdata, mdata):
    """calculate the model deviation of the rest idx"""
    pick_data = jdata['pick_data']
    use_clusters = jdata.get('use_clusters', False)
    iter_name = make_iter_name(iter_index)
    work_path = os.path.join(iter_name, model_devi_name)
    create_path(work_path)
    # link the model
    train_path = os.path.join(iter_name, train_name)
    train_path = os.path.abspath(train_path)
    models = glob.glob(os.path.join(train_path, "graph*pb"))
    for mm in models:
        model_name = os.path.basename(mm)
        os.symlink(mm, os.path.join(work_path, model_name))
    # link the last rest data
    last_iter_name = make_iter_name(iter_index - 1)
    rest_data_path = os.path.join(last_iter_name, model_devi_name,
                                  rest_data_name)
    if not os.path.exists(rest_data_path):
        return False
    if use_clusters:
        for jj, subsystem in enumerate(os.listdir(rest_data_path)):
            task_name = "task." + model_devi_task_fmt % (0, jj)
            task_path = os.path.join(work_path, task_name)
            create_path(task_path)
            os.symlink(
                os.path.abspath(os.path.join(rest_data_path, subsystem)),
                os.path.abspath(os.path.join(task_path, rest_data_name)))
    else:
        rest_data_path = os.path.abspath(rest_data_path)
        sys_path = glob.glob(os.path.join(rest_data_path, sys_name_pattern))
        cwd = os.getcwd()
        for ii in sys_path:
            task_name = "task." + model_devi_task_fmt % (int(
                os.path.basename(ii).split('.')[1]), 0)
            task_path = os.path.join(work_path, task_name)
            create_path(task_path)
            os.chdir(task_path)
            os.symlink(os.path.relpath(ii), rest_data_name)
            os.chdir(cwd)
        os.chdir(cwd)
    return True