def build_deepmd(path, nsw, outcar, deepmd): ls = LabeledSystem(outcar, fmt='outcar') """ sub_ls = ls.sub_system(idx) """ if args.idx: print("index file provided") idx = np.loadtxt(args.idx).astype(int) # ls = ls.sub_system(idx) if args.vaspidx: print("vasp index file provided") vaspidx = np.loadtxt(args.vaspidx) fp = open(outcar) fp.readline() nsw_sel = fp.readline() if 'nsw_sel' in nsw_sel: print('file generated by merge_out.py') # print(nsw_sel) tmp = nsw_sel.split('=')[1].strip().split(' ') nsw_sel = [int(tmp_idx) for tmp_idx in tmp] idx = [] for i in range(len(nsw_sel)): if nsw_sel[i] in vaspidx: idx.append(i) else: print('OUTCAR file generated by VASP') idx = vaspidx - 1 idx2 = [i for i in range(len(ls)) if i not in idx] ls2 = ls.sub_system(idx2) ls = ls.sub_system(idx) deepmd = os.path.join(path, deepmd) if args.batchsize: set_size = args.batchsize else: if nsw <= 4: # we know nsw must > 100 set_size = 1 print("{0} has only {1}".format(path, nsw)) if nsw > 4: set_size, _ = best_size( nsw ) # 25% used as , but if say 82, then 20, 20, 20, 2, too less ls.to_deepmd_npy(deepmd, set_size=set_size) if args.test: ls2.to_deepmd_npy('test_tmp', set_size=100000) shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001')) shutil.rmtree('test_tmp')
def to_system(self, data, **kwargs): """ convert system to list, usefull for data collection """ from dpdata import System, LabeledSystem if 'forces' in data: system = LabeledSystem(data=data) else: system = System(data=data) if len(system) == 0: return [] if len(system) == 1: return [system] else: systems = [] for ii in range(len(system)): systems.append(system.sub_system([ii])) return systems
def build_deepmd_frames(path, outcar, deepmd): """ sub_ls = ls.sub_system(idx) """ try: ls = LabeledSystem(outcar, fmt=args.format) except: ls = System(outcar, fmt=args.format) if args.exclude: oldsize = len(ls) idx_new = [i for i in range(len(ls)) if i not in args.exclude] ls = ls.sub_system(idx_new) newsize = len(ls) print('{0}/{1} is selected'.format(newsize, oldsize)) if args.force_limit: fmin = min(args.force_limit) fmax = max(args.force_limit) print("force limit imposed, force in between {0}, {1}".format( fmin, fmax)) idx_new = [] exclude = [] for i in range(len(ls)): forces = ls[i].data['forces'] if forces.min() >= fmin and forces.max() <= fmax: idx_new.append(i) else: exclude.append(i) print('excluded frames', exclude) print('{0} / {1} is selected'.format(len(idx_new), len(ls))) ls = ls.sub_system(idx_new) if args.idx: print("index file provided") idx = np.loadtxt(args.idx).astype(int) elif (not args.idx) and args.vaspidx: print("vasp index file provided") vaspidx = np.loadtxt(args.vaspidx) fp = open(outcar) fp.readline() nsw_sel = fp.readline() if 'nsw_sel' in nsw_sel: print('file generated by merge_out.py') tmp = nsw_sel.split('=')[1].strip().split(' ') nsw_sel = [int(tmp_idx) for tmp_idx in tmp] idx = [] for i in range(len(nsw_sel)): if nsw_sel[i] in vaspidx: idx.append(i) else: print('OUTCAR file generated by VASP') idx = vaspidx - 1 else: print("split train and test by ratio {0} : {1}".format( args.train_test_ratio, 1)) train_size = round( len(ls) * (args.train_test_ratio) / (args.train_test_ratio + 1)) idx = np.random.choice(range(len(ls)), train_size, replace=False) idx.sort() idx2 = [i for i in range(len(ls)) if i not in idx] # test ls2 = ls.sub_system(idx2) # test ls = ls.sub_system(idx) deepmd = os.path.join(path, deepmd) ls.to_deepmd_npy(deepmd, set_size=1000000) # give a *large* value, default is 5000 if len(ls2) == 0: print('test set has no data') elif args.savetest and len(ls2) > 0: ls2.to_deepmd_npy('test_tmp', set_size=1000000) shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001')) shutil.rmtree('test_tmp')
#max_f = [] #for f in force: # max_f.append(np.max(np.max(f))) #print(max_f) #plt.plot(max_f) tmp = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.project.ESS.lstixrud.jd848.pv+hf.dp-train.lmp_run.6k.rp5.160-cpu.pert.10k_good_p3.recal/deepmd_all' tmp2 = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.home.j.jd848.project-lstixrud.metad.3rd.recal/deepmd/' ls = LabeledSystem(tmp2,fmt='deepmd/npy') from dpdata import LabeledSystem import numpy as np ls = LabeledSystem('deepmd_ttr2',fmt='deepmd/npy') idx = list(range(len(ls))) idx.remove(14) ls2 = ls.sub_system(idx) ls2.to_deepmd_npy('test') fparam = np.load('deepmd_ttr2/set.001/fparam.npy') np.save('test/set.000/fparam.npy',fparam[idx]) cp -r deepmd_ttr2 deepmd_ttr2_tmp rm -r deepmd_ttr2_tmp/set.000 from dpdata import LabeledSystem import numpy as np ls = LabeledSystem('deepmd_ttr2_tmp',fmt='deepmd/npy') idx = list(range(len(ls))) idx.remove(10) ls2 = ls.sub_system(idx) ls2.to_deepmd_npy('test') fparam = np.load('deepmd_ttr2_tmp/set.000/fparam.npy')
from glob import glob from tqdm import tqdm """ process multi systems """ fs = glob('iter.0000[4-7]*/02.fp/task*/OUTCAR') maxf = 3.0 ms = MultiSystems() ic = 0 vacuum_size = 13 for f in tqdm(fs): if check_cluster(f.replace('OUTCAR', 'POSCAR'), vacuum_size, fmt='POSCAR'): print(f) continue try: ls = LabeledSystem(f) except: print(f) continue if len(ls) > 0: if ls.sub_system([0]).data['forces'].max() > maxf: pass else: ic += 1 ms.append(ls) print(len(fs)) print(ic) ms.to_deepmd_raw('deepmd-f%s' % maxf) ms.to_deepmd_npy('deepmd-f%s' % maxf)
from glob import glob from dpdata import LabeledSystem from monty.serialization import dumpfn, loadfn from tqdm import tqdm fs = glob('usefull-[1-3]/sys-*/OUTCAR') entries = [] for f in tqdm(fs): ls = LabeledSystem(f) ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry() entry = ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()[0] entries.append(entry) dumpfn(entries, 'all-vasp-entries.json')