def build_deepmd(path,nsw): ls = LabeledSystem(os.path.join(path, 'outcar'),fmt='outcar') deepmd = os.path.join(path,'deepmd') if nsw <= 4: # we know nsw must > 100 set_size = 1 print("{0} has only {1}".format(path,nsw)) if nsw > 4: set_size = nsw//4 # 25% used as , but if say 82, then 20, 20, 20, 2, too less ls.to_deepmd_npy(deepmd,set_size=set_size)
def build_deepmd(path,nsw): ls=LabeledSystem(os.path.join(path, 'OUTCAR'),fmt='outcar') deepmd = os.path.join(path,'deepmd') if nsw <= 2000: # we know nsw must > 100 set_size = nsw//2 if nsw > 2000: set_size = 1000 ls.to_deepmd_npy(deepmd,set_size=set_size) if nsw>3000: check_sets(deepmd)
def build_deepmd(path, nsw, outcar, deepmd): ls = LabeledSystem(outcar, fmt='outcar') """ sub_ls = ls.sub_system(idx) """ if args.idx: print("index file provided") idx = np.loadtxt(args.idx).astype(int) # ls = ls.sub_system(idx) if args.vaspidx: print("vasp index file provided") vaspidx = np.loadtxt(args.vaspidx) fp = open(outcar) fp.readline() nsw_sel = fp.readline() if 'nsw_sel' in nsw_sel: print('file generated by merge_out.py') # print(nsw_sel) tmp = nsw_sel.split('=')[1].strip().split(' ') nsw_sel = [int(tmp_idx) for tmp_idx in tmp] idx = [] for i in range(len(nsw_sel)): if nsw_sel[i] in vaspidx: idx.append(i) else: print('OUTCAR file generated by VASP') idx = vaspidx - 1 idx2 = [i for i in range(len(ls)) if i not in idx] ls2 = ls.sub_system(idx2) ls = ls.sub_system(idx) deepmd = os.path.join(path, deepmd) if args.batchsize: set_size = args.batchsize else: if nsw <= 4: # we know nsw must > 100 set_size = 1 print("{0} has only {1}".format(path, nsw)) if nsw > 4: set_size, _ = best_size( nsw ) # 25% used as , but if say 82, then 20, 20, 20, 2, too less ls.to_deepmd_npy(deepmd, set_size=set_size) if args.test: ls2.to_deepmd_npy('test_tmp', set_size=100000) shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001')) shutil.rmtree('test_tmp')
def build_deepmd_frames(path, outcar, deepmd): """ sub_ls = ls.sub_system(idx) """ try: ls = LabeledSystem(outcar, fmt=args.format) except: ls = System(outcar, fmt=args.format) if args.exclude: oldsize = len(ls) idx_new = [i for i in range(len(ls)) if i not in args.exclude] ls = ls.sub_system(idx_new) newsize = len(ls) print('{0}/{1} is selected'.format(newsize, oldsize)) if args.force_limit: fmin = min(args.force_limit) fmax = max(args.force_limit) print("force limit imposed, force in between {0}, {1}".format( fmin, fmax)) idx_new = [] exclude = [] for i in range(len(ls)): forces = ls[i].data['forces'] if forces.min() >= fmin and forces.max() <= fmax: idx_new.append(i) else: exclude.append(i) print('excluded frames', exclude) print('{0} / {1} is selected'.format(len(idx_new), len(ls))) ls = ls.sub_system(idx_new) if args.idx: print("index file provided") idx = np.loadtxt(args.idx).astype(int) elif (not args.idx) and args.vaspidx: print("vasp index file provided") vaspidx = np.loadtxt(args.vaspidx) fp = open(outcar) fp.readline() nsw_sel = fp.readline() if 'nsw_sel' in nsw_sel: print('file generated by merge_out.py') tmp = nsw_sel.split('=')[1].strip().split(' ') nsw_sel = [int(tmp_idx) for tmp_idx in tmp] idx = [] for i in range(len(nsw_sel)): if nsw_sel[i] in vaspidx: idx.append(i) else: print('OUTCAR file generated by VASP') idx = vaspidx - 1 else: print("split train and test by ratio {0} : {1}".format( args.train_test_ratio, 1)) train_size = round( len(ls) * (args.train_test_ratio) / (args.train_test_ratio + 1)) idx = np.random.choice(range(len(ls)), train_size, replace=False) idx.sort() idx2 = [i for i in range(len(ls)) if i not in idx] # test ls2 = ls.sub_system(idx2) # test ls = ls.sub_system(idx) deepmd = os.path.join(path, deepmd) ls.to_deepmd_npy(deepmd, set_size=1000000) # give a *large* value, default is 5000 if len(ls2) == 0: print('test set has no data') elif args.savetest and len(ls2) > 0: ls2.to_deepmd_npy('test_tmp', set_size=1000000) shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001')) shutil.rmtree('test_tmp')
from dpdata import System, LabeledSystem, MultiSystems import os fp = open('folders_to_merge', 'r') folders_org = fp.readlines() folders = [] fp.close() for i in range(len(folders_org)): if '#' in folders_org[i] or folders_org[i] == '\n': pass else: folders.append(folders_org[i].replace('\n', '')) for path in folders: pwd = os.getcwd() os.chdir(path) print("process ", path) #s=System('POSCAR',fmt='poscar') ls = LabeledSystem('OUTCAR', fmt='outcar') ls.to_deepmd_raw('deepmd') ls.to_deepmd_npy('deepmd', set_size=1000) os.chdir(pwd) print("done ", path) print("done")