Python LabeledSystemの例、dpdata.LabeledSystem Pythonの例

コード例 #1

0

ファイルを表示

    def testEntry(self):
        entries = []
        for i, f in enumerate(self.iter_path):
            vi = VaspInput.from_directory(f)
            ls = LabeledSystem(os.path.join(f, 'OUTCAR'))
            attrib = loadfn(os.path.join(f, 'job.json'))
            comp = vi['POSCAR'].structure.composition
            entry = Entry(comp,
                          'vasp',
                          vi.as_dict(),
                          ls.as_dict(),
                          entry_id='pku-' + str(i),
                          attribute=attrib)
            entries.append(entry)
        self.assertEqual(len(entries), len(self.ref_entries))
        ret0 = entries[0]
        r0 = self.ref_entries[0]
        self.assertEqual(Incar.from_dict(ret0.inputs['INCAR']),
                         Incar.from_dict(r0.inputs['INCAR']))
        self.assertEqual(str(r0.inputs['KPOINTS']),
                         str(Kpoints.from_dict(ret0.inputs['KPOINTS'])))

        self.assertEqual(ret0.inputs['POTCAR'], r0.inputs['POTCAR'].as_dict())
        self.assertEqual(
            Poscar.from_dict(ret0.inputs['POSCAR']).structure,
            r0.inputs['POSCAR'].structure)
        self.assertEqual(ret0.entry_id, 'pku-0')

コード例 #2

0

ファイルを表示

ファイル: VASP.py プロジェクト: picodase/dpgen

    def compute(self, output_dir):
        outcar = os.path.join(output_dir, 'OUTCAR')
        if not os.path.isfile(outcar):
            dlog.warning("cannot find OUTCAR in " + output_dir + " skip")
            return None
        else:
            ls = LabeledSystem(outcar)
            stress = []
            with open(outcar, 'r') as fin:
                lines = fin.read().split('\n')
            for line in lines:
                if 'in kB' in line:
                    stress_xx = float(line.split()[2])
                    stress_yy = float(line.split()[3])
                    stress_zz = float(line.split()[4])
                    stress_xy = float(line.split()[5])
                    stress_yz = float(line.split()[6])
                    stress_zx = float(line.split()[7])
                    stress.append([])
                    stress[-1].append([stress_xx, stress_xy, stress_zx])
                    stress[-1].append([stress_xy, stress_yy, stress_yz])
                    stress[-1].append([stress_zx, stress_yz, stress_zz])

            outcar_dict = ls.as_dict()
            outcar_dict['data']['stress'] = {
                "@module": "numpy",
                "@class": "array",
                "dtype": "float64",
                "data": stress
            }

            return outcar_dict

コード例 #3

0

ファイルを表示

ファイル: remove_deepmd.py プロジェクト: neojie/mldp

def build_deepmd(path,nsw):
    ls = LabeledSystem(os.path.join(path, 'outcar'),fmt='outcar')
    deepmd = os.path.join(path,'deepmd')
    if nsw <= 4: # we know nsw must > 100
        set_size = 1
        print("{0} has only {1}".format(path,nsw))
    if nsw > 4:
        set_size = nsw//4  # 25% used as , but if say 82, then 20, 20, 20, 2, too less
    ls.to_deepmd_npy(deepmd,set_size=set_size)

コード例 #4

0

ファイルを表示

ファイル: bs_deepmd_fparam.py プロジェクト: neojie/mldp

def build_deepmd(path,nsw):
    ls=LabeledSystem(os.path.join(path, 'OUTCAR'),fmt='outcar')
    deepmd = os.path.join(path,'deepmd')
    if nsw <= 2000: # we know nsw must > 100
        set_size = nsw//2
    if nsw > 2000:
        set_size = 1000
    ls.to_deepmd_npy(deepmd,set_size=set_size)
    if nsw>3000:
        check_sets(deepmd)

コード例 #5

0

ファイルを表示

ファイル: entry.py プロジェクト: haidi-ustc/dpdb

 def load(cls, filename, Cls=None):
     with open(filename, 'r') as f:
         fc = f.read()
     jc = json.loads(fc)
     composition = jc['composition']
     calculator = jc['calculator']
     if calculator.lower() == 'vasp':
         try:
             inputs = VaspInput.from_dict(jc['inputs']).as_dict()
         except:
             inputs = jc['inputs']
             warnings.warn("""Inproperly configure of POTCAR !
                             Returned instance cannot be used 
                             as input for from_dict() method """)
     else:
         if Cls:
             inputs = Cls.from_dict(jc['inputs']).as_dict()
         else:
             raise RuntimeError("inputs decoder must be given")
     data = LabeledSystem.from_dict(jc['data']).as_dict()
     attribute = jc['attribute']
     entry_id = jc['entry_id']
     tag = jc['tag']
     return cls(composition, calculator, inputs, data, entry_id, attribute,
                tag)

コード例 #6

0

ファイルを表示

ファイル: list.py プロジェクト: njzjz/dpdata

 def to_system(self, data, **kwargs):
     """
     convert system to list, usefull for data collection
     """
     from dpdata import System, LabeledSystem
     if 'forces' in data:
         system = LabeledSystem(data=data)
     else:
         system = System(data=data)
     if len(system) == 0:
         return []
     if len(system) == 1:
         return [system]
     else:
         systems = []
         for ii in range(len(system)):
             systems.append(system.sub_system([ii]))
         return systems

コード例 #7

0

ファイルを表示

ファイル: entry.py プロジェクト: haidi-ustc/dpdb

def test():
    from monty.serialization import dumpfn, loadfn
    from monty.json import MontyDecoder, MontyEncoder
    from pymatgen.io.vasp.inputs import PotcarSingle, Potcar
    vi = VaspInput.from_directory('.')
    ls = LabeledSystem('OUTCAR', fmt='vasp/outcar')
    en0 = Entry('Al',
                'vasp',
                inputs=vi.as_dict(),
                data=ls.as_dict(),
                entry_id='pku-1')
    print(en0)
    fname = 'pku-1.json'
    dumpfn(en0.as_dict(), fname, indent=4)
    en1 = Entry.load(fname)
    #vin=VaspInput.from_dict(en1.inputs)
    #vin.write_input('./new')
    print(en1)
    print(en1.as_dict())

コード例 #8

0

ファイルを表示

def _parsing_vasp(paths, id_prefix, iters=True):
    entries = []
    icount = 0
    for path in paths:
        f_outcar = os.path.join(path, 'OUTCAR')
        f_job = os.path.join(path, 'job.json')

        try:
            vi = VaspInput.from_directory(path)
            if os.path.isfile(f_job):
                attrib = loadfn(f_job)
            else:
                attrib = {}

            if iters and attrib:
                tmp_ = path.split('/')[-1]
                iter_info = tmp_.split('.')[1]
                task_info = tmp_.split('.')[-1]
                attrib['iter_info'] = iter_info
                attrib['task_info'] = task_info
            else:
                pass
            comp = vi['POSCAR'].structure.composition
            ls = LabeledSystem(f_outcar)
            lss = ls.to_list()
            for ls in lss:
                if id_prefix:
                    eid = id_prefix + "_" + str(icount)
                else:
                    eid = str(uuid4())
                entry = Entry(comp,
                              'vasp',
                              vi.as_dict(),
                              ls.as_dict(),
                              attribute=attrib,
                              entry_id=eid)
                entries.append(entry)
                icount += 1
        except:
            dlog.info("failed here : %s" % path)
    return entries

コード例 #9

0

ファイルを表示

ファイル: model_dev_funcs.py プロジェクト: neojie/mldp

def extract_outcar(outcar):
    """
    extract e, f, v
    """
    ### get confgis that were recalculated
    ls = LabeledSystem(outcar, fmt='outcar')
    fp = open(outcar)
    fp.readline()
    nsw_sel = fp.readline()
    if 'nsw_sel' in nsw_sel:
        print('file generated by merge_out.py')
        tmp = nsw_sel.split('=')[1].strip().split(' ')
        nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
    ### get confgis that were recalculated
    etot = ls['energies']
    nsw = np.array(nsw_sel).astype(int) - 1  # relative nsw, starting from 0
    stress = ls['virials']
    forces = ls['forces']
    return etot, stress, forces, nsw

コード例 #10

0

ファイルを表示

def build_deepmd(path, nsw, outcar, deepmd):
    ls = LabeledSystem(outcar, fmt='outcar')
    """
    sub_ls = ls.sub_system(idx)
    
    """
    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)


#        ls = ls.sub_system(idx)
    if args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            #    print(nsw_sel)
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    idx2 = [i for i in range(len(ls)) if i not in idx]
    ls2 = ls.sub_system(idx2)
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)
    if args.batchsize:
        set_size = args.batchsize
    else:
        if nsw <= 4:  # we know nsw must > 100
            set_size = 1
            print("{0} has only {1}".format(path, nsw))
        if nsw > 4:
            set_size, _ = best_size(
                nsw
            )  # 25% used as , but if say 82, then 20, 20, 20, 2, too less
    ls.to_deepmd_npy(deepmd, set_size=set_size)
    if args.test:
        ls2.to_deepmd_npy('test_tmp', set_size=100000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')

コード例 #11

0

ファイルを表示

ファイル: multi_sys_0d_filter.py プロジェクト: haidi-ustc/scripts

from glob import glob
from tqdm import tqdm
"""
process multi systems
"""
fs = glob('iter.0000[4-7]*/02.fp/task*/OUTCAR')
maxf = 3.0
ms = MultiSystems()
ic = 0
vacuum_size = 13
for f in tqdm(fs):
    if check_cluster(f.replace('OUTCAR', 'POSCAR'), vacuum_size, fmt='POSCAR'):
        print(f)
        continue
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
        continue
    if len(ls) > 0:
        if ls.sub_system([0]).data['forces'].max() > maxf:
            pass
        else:
            ic += 1
            ms.append(ls)

print(len(fs))
print(ic)
ms.to_deepmd_raw('deepmd-f%s' % maxf)
ms.to_deepmd_npy('deepmd-f%s' % maxf)

コード例 #12

0

ファイルを表示

ファイル: extract_deepmd.py プロジェクト: neojie/mldp

def build_deepmd_frames(path, outcar, deepmd):
    """
    sub_ls = ls.sub_system(idx)
    
    """
    try:
        ls = LabeledSystem(outcar, fmt=args.format)
    except:
        ls = System(outcar, fmt=args.format)

    if args.exclude:
        oldsize = len(ls)
        idx_new = [i for i in range(len(ls)) if i not in args.exclude]
        ls = ls.sub_system(idx_new)
        newsize = len(ls)
        print('{0}/{1} is selected'.format(newsize, oldsize))

    if args.force_limit:
        fmin = min(args.force_limit)
        fmax = max(args.force_limit)
        print("force limit imposed, force in between {0}, {1}".format(
            fmin, fmax))
        idx_new = []
        exclude = []
        for i in range(len(ls)):
            forces = ls[i].data['forces']
            if forces.min() >= fmin and forces.max() <= fmax:
                idx_new.append(i)
            else:
                exclude.append(i)
        print('excluded frames', exclude)
        print('{0} / {1} is selected'.format(len(idx_new), len(ls)))
        ls = ls.sub_system(idx_new)

    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)
    elif (not args.idx) and args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    else:
        print("split train and test by ratio {0} : {1}".format(
            args.train_test_ratio, 1))
        train_size = round(
            len(ls) * (args.train_test_ratio) / (args.train_test_ratio + 1))
        idx = np.random.choice(range(len(ls)), train_size, replace=False)
        idx.sort()

    idx2 = [i for i in range(len(ls)) if i not in idx]  # test
    ls2 = ls.sub_system(idx2)  # test
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)

    ls.to_deepmd_npy(deepmd,
                     set_size=1000000)  # give a *large* value, default is 5000
    if len(ls2) == 0:
        print('test set has no data')
    elif args.savetest and len(ls2) > 0:
        ls2.to_deepmd_npy('test_tmp', set_size=1000000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')

コード例 #13

0

ファイルを表示

ファイル: post_check_force.py プロジェクト: neojie/mldp

print(force1.min(axis=1))
print(min(force1.min(axis=1)), max(force1.max(axis=1)))
print(np.argmin(force1.min(axis=1)), np.argmax(force1.max(axis=1)))


#force=np.load('/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/generate_new_poscar/pairs/untitled folder/sisi/deepmd/set.000/force.npy')

#max_f = []
#for f in force:
#    max_f.append(np.max(np.max(f)))
#print(max_f)    
#plt.plot(max_f)
tmp  = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.project.ESS.lstixrud.jd848.pv+hf.dp-train.lmp_run.6k.rp5.160-cpu.pert.10k_good_p3.recal/deepmd_all'

tmp2 = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.home.j.jd848.project-lstixrud.metad.3rd.recal/deepmd/'
ls = LabeledSystem(tmp2,fmt='deepmd/npy')

from dpdata import LabeledSystem
import numpy as np
ls = LabeledSystem('deepmd_ttr2',fmt='deepmd/npy')
idx = list(range(len(ls)))
idx.remove(14)
ls2 = ls.sub_system(idx)
ls2.to_deepmd_npy('test')
fparam = np.load('deepmd_ttr2/set.001/fparam.npy')
np.save('test/set.000/fparam.npy',fparam[idx])

cp -r deepmd_ttr2 deepmd_ttr2_tmp
rm -r deepmd_ttr2_tmp/set.000

from dpdata import LabeledSystem

コード例 #14

0

ファイルを表示

import sys
sys.path.append(
    '/Users/jiedeng/Documents/ml/deepmd-kit/my_example/codes/Data.py')
from Data import DataSets
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import dpdata
vasp_multi_systems = dpdata.MultiSystems.from_dir(
    dir_name=
    '/Users/jiedeng/Documents/tmp/jd848/project_folder/pv+hf/3k/solid1/r3-3k/',
    file_name='OUTCAR',
    fmt='vasp/outcar')
from dpdata import LabeledSystem
ls = LabeledSystem(
    '/Users/jiedeng/Documents/tmp/jd848/project_folder/pv+hf/3k/solid1/r3-3k/OUTCAR',
    fmt='outcar')
print(ls.data['coords'].shape)  # (5000, 160, 3)

scaler = StandardScaler()
"""
we get n sample, we need find the most important or representative coord so that 
them can represent the rest of features.
1) z-score variables
2) eigendecomposition of covariance matrix, covriance matrix should be n*n, not 480*480
3) sort eigenvalues 
3) projection of the original normalized data onto the PCA space
If we use above protocal, dat should be dat
But if we use PCA module directly, input should be dat.T
"""
### benchmark

コード例 #15

0

ファイルを表示

ファイル: single_sys.py プロジェクト: haidi-ustc/scripts

from dpdata import LabeledSystem,MultiSystems
from glob import glob
"""
process multi systems
"""
try:
   ls=LabeledSystem('OUTCAR')
except:
   print(f)

ms.to_deepmd_raw('deepmd')
ms.to_deepmd_npy('deepmd')

コード例 #16

0

ファイルを表示

ファイル: dpdata_batch.py プロジェクト: neojie/mldp

from dpdata import System, LabeledSystem, MultiSystems
import os

fp = open('folders_to_merge', 'r')
folders_org = fp.readlines()

folders = []
fp.close()

for i in range(len(folders_org)):
    if '#' in folders_org[i] or folders_org[i] == '\n':
        pass
    else:
        folders.append(folders_org[i].replace('\n', ''))

for path in folders:
    pwd = os.getcwd()
    os.chdir(path)
    print("process ", path)
    #s=System('POSCAR',fmt='poscar')
    ls = LabeledSystem('OUTCAR', fmt='outcar')
    ls.to_deepmd_raw('deepmd')
    ls.to_deepmd_npy('deepmd', set_size=1000)
    os.chdir(pwd)
    print("done ", path)

print("done")

コード例 #17

0

ファイルを表示

ファイル: run.py プロジェクト: LiuGroupHNU/dpgen-1

def _parsing_vasp(paths, config_info_dict, id_prefix, iters=True):
    entries = []
    icount = 0
    if iters:
        iter_record = []
        iter_record_new = []
        try:
            with open("record.database", "r") as f_record:
                iter_record = [i.split()[0] for i in f_record.readlines()]
            iter_record.sort()
            dlog.info("iter_record")
            dlog.info(iter_record)
        except:
            pass
    for path in paths:
        try:
            f_outcar = os.path.join(path, 'OUTCAR')
            f_job = os.path.join(path, 'job.json')
            tmp_iter = path.split('/')[-3]
            if (tmp_iter in iter_record) and (tmp_iter != iter_record[-1]):
                continue
            if tmp_iter not in iter_record_new:
                iter_record_new.append(tmp_iter)
            vi = VaspInput.from_directory(path)
            if os.path.isfile(f_job):
                attrib = loadfn(f_job)
            else:
                attrib = {}

            if iters and attrib:
                # generator/Cu/iter.000031/02.fp/task.007.000000
                tmp_ = path.split('/')[-1]
                #config_info=tmp_.split('.')[1]
                task_info = tmp_.split('.')[-1]
                tmp_iter = path.split('/')[-3]
                iter_info = tmp_iter.split('.')[-1]
                sys_info = path.split('/')[-4]
                config_info_int = int(tmp_.split('.')[1])
                for (key, value) in config_info_dict.items():
                    if config_info_int in value:
                        config_info = key
                attrib['config_info'] = config_info
                attrib['task_info'] = task_info
                attrib['iter_info'] = iter_info
                attrib['sys_info'] = sys_info
                with open(f_outcar, "r") as fin_outcar:
                    infile_outcar = fin_outcar.readlines()
                for line in infile_outcar:
                    if "running on" in line:
                        attrib["core"] = int(line.split()[2])
                    if "Elapse" in line:
                        attrib["wall_time"] = float(line.split()[-1])
                    if "executed on" in line:
                        attrib["date"] = line.split()[-2]
                        attrib["clocktime"] = line.split()[-1]
                dlog.info("Attrib")
                dlog.info(attrib)
            comp = vi['POSCAR'].structure.composition
            ls = LabeledSystem(f_outcar)
            lss = ls.to_list()
            for ls in lss:
                if id_prefix:
                    eid = id_prefix + "_" + str(icount)
                else:
                    eid = str(uuid4())
            entry = Entry(comp,
                          'vasp',
                          vi.as_dict(),
                          ls.as_dict(),
                          attribute=attrib,
                          entry_id=eid)
            entries.append(entry)
            icount += 1
        except Exception:
            #dlog.info(str(Exception))
            dlog.info("failed for %s" % (path))
            #pass
    if iters:
        iter_record.sort()
        iter_record_new.sort()
        with open("record.database", "w") as fw:
            for line in iter_record:
                fw.write(line + "\n")
            for line in iter_record_new:
                fw.write(line + "\n")
    return entries

コード例 #18

0

ファイルを表示

from dpdata import System, LabeledSystem, MultiSystems

#s = System('POSCAR', fmt='poscar')
#print(s)
ls = LabeledSystem('OUTCAR', fmt='outcar')
"""
if len(ls)%2==0:
    size = int(len(ls)/2)
else:
    size = int(len(ls)/2) + 1
"""
ls.to_deepmd_raw('.')
#ls.to_deepmd_npy('deepmd', set_size=size)

コード例 #19

0

ファイルを表示

from dpdata import LabeledSystem, MultiSystems
from glob import glob
"""
process multi systems
"""
fs = glob('./*/[0-9]*/OUTCAR')
ms = MultiSystems()
for f in fs:
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
    if len(ls) > 0:
        ms.append(ls)

ms.to_deepmd_raw('deepmd')
ms.to_deepmd_npy('deepmd')

コード例 #20

0

ファイルを表示

from dpdata import LabeledSystem, MultiSystems
from glob import glob
from tqdm import tqdm
"""
process multi systems
"""
fs = glob('iter.0000[3-5]*/02.fp/task*/OUTCAR')
maxf = 1.0
ms = MultiSystems()
ic = 0
for f in tqdm(fs):
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
    if len(ls) > 0:
        st = ls.to_pymatgen_structure()[0]
        z = st.cart_coords[:, 2]
        if st.lattice.c - (z.max() - z.min()) < 14:
            pass
        else:
            if ls.sub_system([0]).data['forces'].max() > maxf:
                pass
            else:
                ic += 1
                ms.append(ls)

print(len(fs))
print(ic)
ms.to_deepmd_raw('deepmd-f%s' % maxf)
ms.to_deepmd_npy('deepmd-f%s' % maxf)

コード例 #21

0

ファイルを表示

from glob import glob
from dpdata import LabeledSystem
from monty.serialization import dumpfn, loadfn
from tqdm import tqdm

fs = glob('usefull-[1-3]/sys-*/OUTCAR')
entries = []
for f in tqdm(fs):
    ls = LabeledSystem(f)
    ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()
    entry = ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()[0]
    entries.append(entry)
dumpfn(entries, 'all-vasp-entries.json')