Ejemplo n.º 1
0
    def testEntry(self):
        entries = []
        for i, f in enumerate(self.iter_path):
            vi = VaspInput.from_directory(f)
            ls = LabeledSystem(os.path.join(f, 'OUTCAR'))
            attrib = loadfn(os.path.join(f, 'job.json'))
            comp = vi['POSCAR'].structure.composition
            entry = Entry(comp,
                          'vasp',
                          vi.as_dict(),
                          ls.as_dict(),
                          entry_id='pku-' + str(i),
                          attribute=attrib)
            entries.append(entry)
        self.assertEqual(len(entries), len(self.ref_entries))
        ret0 = entries[0]
        r0 = self.ref_entries[0]
        self.assertEqual(Incar.from_dict(ret0.inputs['INCAR']),
                         Incar.from_dict(r0.inputs['INCAR']))
        self.assertEqual(str(r0.inputs['KPOINTS']),
                         str(Kpoints.from_dict(ret0.inputs['KPOINTS'])))

        self.assertEqual(ret0.inputs['POTCAR'], r0.inputs['POTCAR'].as_dict())
        self.assertEqual(
            Poscar.from_dict(ret0.inputs['POSCAR']).structure,
            r0.inputs['POSCAR'].structure)
        self.assertEqual(ret0.entry_id, 'pku-0')
Ejemplo n.º 2
0
    def compute(self, output_dir):
        outcar = os.path.join(output_dir, 'OUTCAR')
        if not os.path.isfile(outcar):
            dlog.warning("cannot find OUTCAR in " + output_dir + " skip")
            return None
        else:
            ls = LabeledSystem(outcar)
            stress = []
            with open(outcar, 'r') as fin:
                lines = fin.read().split('\n')
            for line in lines:
                if 'in kB' in line:
                    stress_xx = float(line.split()[2])
                    stress_yy = float(line.split()[3])
                    stress_zz = float(line.split()[4])
                    stress_xy = float(line.split()[5])
                    stress_yz = float(line.split()[6])
                    stress_zx = float(line.split()[7])
                    stress.append([])
                    stress[-1].append([stress_xx, stress_xy, stress_zx])
                    stress[-1].append([stress_xy, stress_yy, stress_yz])
                    stress[-1].append([stress_zx, stress_yz, stress_zz])

            outcar_dict = ls.as_dict()
            outcar_dict['data']['stress'] = {
                "@module": "numpy",
                "@class": "array",
                "dtype": "float64",
                "data": stress
            }

            return outcar_dict
Ejemplo n.º 3
0
def build_deepmd(path,nsw):
    ls = LabeledSystem(os.path.join(path, 'outcar'),fmt='outcar')
    deepmd = os.path.join(path,'deepmd')
    if nsw <= 4: # we know nsw must > 100
        set_size = 1
        print("{0} has only {1}".format(path,nsw))
    if nsw > 4:
        set_size = nsw//4  # 25% used as , but if say 82, then 20, 20, 20, 2, too less
    ls.to_deepmd_npy(deepmd,set_size=set_size)
Ejemplo n.º 4
0
def build_deepmd(path,nsw):
    ls=LabeledSystem(os.path.join(path, 'OUTCAR'),fmt='outcar')
    deepmd = os.path.join(path,'deepmd')
    if nsw <= 2000: # we know nsw must > 100
        set_size = nsw//2
    if nsw > 2000:
        set_size = 1000
    ls.to_deepmd_npy(deepmd,set_size=set_size)
    if nsw>3000:
        check_sets(deepmd)
Ejemplo n.º 5
0
 def load(cls, filename, Cls=None):
     with open(filename, 'r') as f:
         fc = f.read()
     jc = json.loads(fc)
     composition = jc['composition']
     calculator = jc['calculator']
     if calculator.lower() == 'vasp':
         try:
             inputs = VaspInput.from_dict(jc['inputs']).as_dict()
         except:
             inputs = jc['inputs']
             warnings.warn("""Inproperly configure of POTCAR !
                             Returned instance cannot be used 
                             as input for from_dict() method """)
     else:
         if Cls:
             inputs = Cls.from_dict(jc['inputs']).as_dict()
         else:
             raise RuntimeError("inputs decoder must be given")
     data = LabeledSystem.from_dict(jc['data']).as_dict()
     attribute = jc['attribute']
     entry_id = jc['entry_id']
     tag = jc['tag']
     return cls(composition, calculator, inputs, data, entry_id, attribute,
                tag)
Ejemplo n.º 6
0
Archivo: list.py Proyecto: njzjz/dpdata
 def to_system(self, data, **kwargs):
     """
     convert system to list, usefull for data collection
     """
     from dpdata import System, LabeledSystem
     if 'forces' in data:
         system = LabeledSystem(data=data)
     else:
         system = System(data=data)
     if len(system) == 0:
         return []
     if len(system) == 1:
         return [system]
     else:
         systems = []
         for ii in range(len(system)):
             systems.append(system.sub_system([ii]))
         return systems
Ejemplo n.º 7
0
def test():
    from monty.serialization import dumpfn, loadfn
    from monty.json import MontyDecoder, MontyEncoder
    from pymatgen.io.vasp.inputs import PotcarSingle, Potcar
    vi = VaspInput.from_directory('.')
    ls = LabeledSystem('OUTCAR', fmt='vasp/outcar')
    en0 = Entry('Al',
                'vasp',
                inputs=vi.as_dict(),
                data=ls.as_dict(),
                entry_id='pku-1')
    print(en0)
    fname = 'pku-1.json'
    dumpfn(en0.as_dict(), fname, indent=4)
    en1 = Entry.load(fname)
    #vin=VaspInput.from_dict(en1.inputs)
    #vin.write_input('./new')
    print(en1)
    print(en1.as_dict())
Ejemplo n.º 8
0
def _parsing_vasp(paths, id_prefix, iters=True):
    entries = []
    icount = 0
    for path in paths:
        f_outcar = os.path.join(path, 'OUTCAR')
        f_job = os.path.join(path, 'job.json')

        try:
            vi = VaspInput.from_directory(path)
            if os.path.isfile(f_job):
                attrib = loadfn(f_job)
            else:
                attrib = {}

            if iters and attrib:
                tmp_ = path.split('/')[-1]
                iter_info = tmp_.split('.')[1]
                task_info = tmp_.split('.')[-1]
                attrib['iter_info'] = iter_info
                attrib['task_info'] = task_info
            else:
                pass
            comp = vi['POSCAR'].structure.composition
            ls = LabeledSystem(f_outcar)
            lss = ls.to_list()
            for ls in lss:
                if id_prefix:
                    eid = id_prefix + "_" + str(icount)
                else:
                    eid = str(uuid4())
                entry = Entry(comp,
                              'vasp',
                              vi.as_dict(),
                              ls.as_dict(),
                              attribute=attrib,
                              entry_id=eid)
                entries.append(entry)
                icount += 1
        except:
            dlog.info("failed here : %s" % path)
    return entries
Ejemplo n.º 9
0
def extract_outcar(outcar):
    """
    extract e, f, v
    """
    ### get confgis that were recalculated
    ls = LabeledSystem(outcar, fmt='outcar')
    fp = open(outcar)
    fp.readline()
    nsw_sel = fp.readline()
    if 'nsw_sel' in nsw_sel:
        print('file generated by merge_out.py')
        tmp = nsw_sel.split('=')[1].strip().split(' ')
        nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
    ### get confgis that were recalculated
    etot = ls['energies']
    nsw = np.array(nsw_sel).astype(int) - 1  # relative nsw, starting from 0
    stress = ls['virials']
    forces = ls['forces']
    return etot, stress, forces, nsw
Ejemplo n.º 10
0
def build_deepmd(path, nsw, outcar, deepmd):
    ls = LabeledSystem(outcar, fmt='outcar')
    """
    sub_ls = ls.sub_system(idx)
    
    """
    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)


#        ls = ls.sub_system(idx)
    if args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            #    print(nsw_sel)
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    idx2 = [i for i in range(len(ls)) if i not in idx]
    ls2 = ls.sub_system(idx2)
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)
    if args.batchsize:
        set_size = args.batchsize
    else:
        if nsw <= 4:  # we know nsw must > 100
            set_size = 1
            print("{0} has only {1}".format(path, nsw))
        if nsw > 4:
            set_size, _ = best_size(
                nsw
            )  # 25% used as , but if say 82, then 20, 20, 20, 2, too less
    ls.to_deepmd_npy(deepmd, set_size=set_size)
    if args.test:
        ls2.to_deepmd_npy('test_tmp', set_size=100000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')
Ejemplo n.º 11
0
from glob import glob
from tqdm import tqdm
"""
process multi systems
"""
fs = glob('iter.0000[4-7]*/02.fp/task*/OUTCAR')
maxf = 3.0
ms = MultiSystems()
ic = 0
vacuum_size = 13
for f in tqdm(fs):
    if check_cluster(f.replace('OUTCAR', 'POSCAR'), vacuum_size, fmt='POSCAR'):
        print(f)
        continue
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
        continue
    if len(ls) > 0:
        if ls.sub_system([0]).data['forces'].max() > maxf:
            pass
        else:
            ic += 1
            ms.append(ls)

print(len(fs))
print(ic)
ms.to_deepmd_raw('deepmd-f%s' % maxf)
ms.to_deepmd_npy('deepmd-f%s' % maxf)
Ejemplo n.º 12
0
def build_deepmd_frames(path, outcar, deepmd):
    """
    sub_ls = ls.sub_system(idx)
    
    """
    try:
        ls = LabeledSystem(outcar, fmt=args.format)
    except:
        ls = System(outcar, fmt=args.format)

    if args.exclude:
        oldsize = len(ls)
        idx_new = [i for i in range(len(ls)) if i not in args.exclude]
        ls = ls.sub_system(idx_new)
        newsize = len(ls)
        print('{0}/{1} is selected'.format(newsize, oldsize))

    if args.force_limit:
        fmin = min(args.force_limit)
        fmax = max(args.force_limit)
        print("force limit imposed, force in between {0}, {1}".format(
            fmin, fmax))
        idx_new = []
        exclude = []
        for i in range(len(ls)):
            forces = ls[i].data['forces']
            if forces.min() >= fmin and forces.max() <= fmax:
                idx_new.append(i)
            else:
                exclude.append(i)
        print('excluded frames', exclude)
        print('{0} / {1} is selected'.format(len(idx_new), len(ls)))
        ls = ls.sub_system(idx_new)

    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)
    elif (not args.idx) and args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    else:
        print("split train and test by ratio {0} : {1}".format(
            args.train_test_ratio, 1))
        train_size = round(
            len(ls) * (args.train_test_ratio) / (args.train_test_ratio + 1))
        idx = np.random.choice(range(len(ls)), train_size, replace=False)
        idx.sort()

    idx2 = [i for i in range(len(ls)) if i not in idx]  # test
    ls2 = ls.sub_system(idx2)  # test
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)

    ls.to_deepmd_npy(deepmd,
                     set_size=1000000)  # give a *large* value, default is 5000
    if len(ls2) == 0:
        print('test set has no data')
    elif args.savetest and len(ls2) > 0:
        ls2.to_deepmd_npy('test_tmp', set_size=1000000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')
Ejemplo n.º 13
0
print(force1.min(axis=1))
print(min(force1.min(axis=1)), max(force1.max(axis=1)))
print(np.argmin(force1.min(axis=1)), np.argmax(force1.max(axis=1)))


#force=np.load('/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/generate_new_poscar/pairs/untitled folder/sisi/deepmd/set.000/force.npy')

#max_f = []
#for f in force:
#    max_f.append(np.max(np.max(f)))
#print(max_f)    
#plt.plot(max_f)
tmp  = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.project.ESS.lstixrud.jd848.pv+hf.dp-train.lmp_run.6k.rp5.160-cpu.pert.10k_good_p3.recal/deepmd_all'

tmp2 = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.home.j.jd848.project-lstixrud.metad.3rd.recal/deepmd/'
ls = LabeledSystem(tmp2,fmt='deepmd/npy')

from dpdata import LabeledSystem
import numpy as np
ls = LabeledSystem('deepmd_ttr2',fmt='deepmd/npy')
idx = list(range(len(ls)))
idx.remove(14)
ls2 = ls.sub_system(idx)
ls2.to_deepmd_npy('test')
fparam = np.load('deepmd_ttr2/set.001/fparam.npy')
np.save('test/set.000/fparam.npy',fparam[idx])

cp -r deepmd_ttr2 deepmd_ttr2_tmp
rm -r deepmd_ttr2_tmp/set.000

from dpdata import LabeledSystem
Ejemplo n.º 14
0
import sys
sys.path.append(
    '/Users/jiedeng/Documents/ml/deepmd-kit/my_example/codes/Data.py')
from Data import DataSets
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import dpdata
vasp_multi_systems = dpdata.MultiSystems.from_dir(
    dir_name=
    '/Users/jiedeng/Documents/tmp/jd848/project_folder/pv+hf/3k/solid1/r3-3k/',
    file_name='OUTCAR',
    fmt='vasp/outcar')
from dpdata import LabeledSystem
ls = LabeledSystem(
    '/Users/jiedeng/Documents/tmp/jd848/project_folder/pv+hf/3k/solid1/r3-3k/OUTCAR',
    fmt='outcar')
print(ls.data['coords'].shape)  # (5000, 160, 3)

scaler = StandardScaler()
"""
we get n sample, we need find the most important or representative coord so that 
them can represent the rest of features.
1) z-score variables
2) eigendecomposition of covariance matrix, covriance matrix should be n*n, not 480*480
3) sort eigenvalues 
3) projection of the original normalized data onto the PCA space
If we use above protocal, dat should be dat
But if we use PCA module directly, input should be dat.T
"""
### benchmark
Ejemplo n.º 15
0
from dpdata import LabeledSystem,MultiSystems
from glob import glob
"""
process multi systems
"""
try:
   ls=LabeledSystem('OUTCAR')
except:
   print(f)

ms.to_deepmd_raw('deepmd')
ms.to_deepmd_npy('deepmd')
Ejemplo n.º 16
0
from dpdata import System, LabeledSystem, MultiSystems
import os

fp = open('folders_to_merge', 'r')
folders_org = fp.readlines()

folders = []
fp.close()

for i in range(len(folders_org)):
    if '#' in folders_org[i] or folders_org[i] == '\n':
        pass
    else:
        folders.append(folders_org[i].replace('\n', ''))

for path in folders:
    pwd = os.getcwd()
    os.chdir(path)
    print("process ", path)
    #s=System('POSCAR',fmt='poscar')
    ls = LabeledSystem('OUTCAR', fmt='outcar')
    ls.to_deepmd_raw('deepmd')
    ls.to_deepmd_npy('deepmd', set_size=1000)
    os.chdir(pwd)
    print("done ", path)

print("done")
Ejemplo n.º 17
0
def _parsing_vasp(paths, config_info_dict, id_prefix, iters=True):
    entries = []
    icount = 0
    if iters:
        iter_record = []
        iter_record_new = []
        try:
            with open("record.database", "r") as f_record:
                iter_record = [i.split()[0] for i in f_record.readlines()]
            iter_record.sort()
            dlog.info("iter_record")
            dlog.info(iter_record)
        except:
            pass
    for path in paths:
        try:
            f_outcar = os.path.join(path, 'OUTCAR')
            f_job = os.path.join(path, 'job.json')
            tmp_iter = path.split('/')[-3]
            if (tmp_iter in iter_record) and (tmp_iter != iter_record[-1]):
                continue
            if tmp_iter not in iter_record_new:
                iter_record_new.append(tmp_iter)
            vi = VaspInput.from_directory(path)
            if os.path.isfile(f_job):
                attrib = loadfn(f_job)
            else:
                attrib = {}

            if iters and attrib:
                # generator/Cu/iter.000031/02.fp/task.007.000000
                tmp_ = path.split('/')[-1]
                #config_info=tmp_.split('.')[1]
                task_info = tmp_.split('.')[-1]
                tmp_iter = path.split('/')[-3]
                iter_info = tmp_iter.split('.')[-1]
                sys_info = path.split('/')[-4]
                config_info_int = int(tmp_.split('.')[1])
                for (key, value) in config_info_dict.items():
                    if config_info_int in value:
                        config_info = key
                attrib['config_info'] = config_info
                attrib['task_info'] = task_info
                attrib['iter_info'] = iter_info
                attrib['sys_info'] = sys_info
                with open(f_outcar, "r") as fin_outcar:
                    infile_outcar = fin_outcar.readlines()
                for line in infile_outcar:
                    if "running on" in line:
                        attrib["core"] = int(line.split()[2])
                    if "Elapse" in line:
                        attrib["wall_time"] = float(line.split()[-1])
                    if "executed on" in line:
                        attrib["date"] = line.split()[-2]
                        attrib["clocktime"] = line.split()[-1]
                dlog.info("Attrib")
                dlog.info(attrib)
            comp = vi['POSCAR'].structure.composition
            ls = LabeledSystem(f_outcar)
            lss = ls.to_list()
            for ls in lss:
                if id_prefix:
                    eid = id_prefix + "_" + str(icount)
                else:
                    eid = str(uuid4())
            entry = Entry(comp,
                          'vasp',
                          vi.as_dict(),
                          ls.as_dict(),
                          attribute=attrib,
                          entry_id=eid)
            entries.append(entry)
            icount += 1
        except Exception:
            #dlog.info(str(Exception))
            dlog.info("failed for %s" % (path))
            #pass
    if iters:
        iter_record.sort()
        iter_record_new.sort()
        with open("record.database", "w") as fw:
            for line in iter_record:
                fw.write(line + "\n")
            for line in iter_record_new:
                fw.write(line + "\n")
    return entries
Ejemplo n.º 18
0
from dpdata import System, LabeledSystem, MultiSystems

#s = System('POSCAR', fmt='poscar')
#print(s)
ls = LabeledSystem('OUTCAR', fmt='outcar')
"""
if len(ls)%2==0:
    size = int(len(ls)/2)
else:
    size = int(len(ls)/2) + 1
"""
ls.to_deepmd_raw('.')
#ls.to_deepmd_npy('deepmd', set_size=size)
Ejemplo n.º 19
0
from dpdata import LabeledSystem, MultiSystems
from glob import glob
"""
process multi systems
"""
fs = glob('./*/[0-9]*/OUTCAR')
ms = MultiSystems()
for f in fs:
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
    if len(ls) > 0:
        ms.append(ls)

ms.to_deepmd_raw('deepmd')
ms.to_deepmd_npy('deepmd')
Ejemplo n.º 20
0
from dpdata import LabeledSystem, MultiSystems
from glob import glob
from tqdm import tqdm
"""
process multi systems
"""
fs = glob('iter.0000[3-5]*/02.fp/task*/OUTCAR')
maxf = 1.0
ms = MultiSystems()
ic = 0
for f in tqdm(fs):
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
    if len(ls) > 0:
        st = ls.to_pymatgen_structure()[0]
        z = st.cart_coords[:, 2]
        if st.lattice.c - (z.max() - z.min()) < 14:
            pass
        else:
            if ls.sub_system([0]).data['forces'].max() > maxf:
                pass
            else:
                ic += 1
                ms.append(ls)

print(len(fs))
print(ic)
ms.to_deepmd_raw('deepmd-f%s' % maxf)
ms.to_deepmd_npy('deepmd-f%s' % maxf)
Ejemplo n.º 21
0
from glob import glob
from dpdata import LabeledSystem
from monty.serialization import dumpfn, loadfn
from tqdm import tqdm

fs = glob('usefull-[1-3]/sys-*/OUTCAR')
entries = []
for f in tqdm(fs):
    ls = LabeledSystem(f)
    ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()
    entry = ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()[0]
    entries.append(entry)
dumpfn(entries, 'all-vasp-entries.json')