def parse(dir_def,xlfile,soc=False,logfile=None): """ Parse correction calculated by sxdefectalign2d and add to pandas dataframe. dir_def (str): path to the defect directory (also where the excel file is) xlfile (str): excel filename to read/save the dataframe from/to [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False. [optional] logfile (str): logfile to save output to """ ## set up logging if logfile: myLogger = logging.setup_logging(logfile) else: myLogger = logging.setup_logging() ## load list of dataframes from sheets from excel file df = pd.read_excel(os.path.join(dir_def,xlfile),sheet_name=None) for q in [qi for qi in df.keys() if qi != 'charge_0']: df[q]['E_corr'] = np.nan for q in osutils.listdironly(os.path.join(dir_def,'')): if q != 'charge_0': for cell in osutils.listdironly(os.path.join(dir_def,q,'')): for vac in osutils.listdironly(os.path.join(dir_def,q,cell,'')): myLogger.info("parsing %s %s %s"%(q,cell,vac)) folder = os.path.join(dir_def,q,cell,vac,'') if soc: myLogger.info("parsing dos subdirectory") folder = os.path.join(folder,'dos','') if os.path.exists(folder) and 'restart' in osutils.listdironly(folder): folder = os.path.join(folder,'restart','') myLogger.info("parsing restart subdirectory") if not os.path.exists(os.path.join(folder,'correction','correction')): myLogger.warning("correction file does not exist") else: with open(os.path.join(folder,'correction','correction')) as f: lines = f.readlines() for line in lines: if line[:21] == 'iso - periodic energy': E_corr = float(line.split()[-2]) df[q].loc[(df[q]['vacuum'] == vac) & (df[q]['supercell'] == cell),'E_corr'] = E_corr ## write the updated excel file writer = pd.ExcelWriter(os.path.join(dir_def,xlfile)) for q in df.keys(): df[q].to_excel(writer, q, index=False) writer.save()
def main(args): ## define a main function callable from another python script parser = argparse.ArgumentParser( description='Create or modify simple database entries') parser.add_argument('main_system', help='the main system e.g. MoS2, WSe2') parser.add_argument('dir_db', help='path to the database directory') parser.add_argument('method', help='how to construct database entry \ ("from_vasp" or "from_formula")') parser.add_argument('--dir_dft', help='required if method is "from_vasp": \ path to main directory containing DFT calcs for this system' ) parser.add_argument('--mu_limit', help='required if method is "from_formula": \ which chemical potential limit to consider, e.g. Mo-rich' ) parser.add_argument('--formula', help='required if method is "formula": \ formula to use to calculate chemical potential, e.g. 0.5*MoS2-0.5*Mo' ) parser.add_argument( '--monolayer', help='is this a monolayer system with vacuum dependence?', default=False, action='store_true') parser.add_argument('--funcs', nargs='+', help='list each functional separated by a space, \ use same naming convention as subdirectories in dir_dft', default=["GGA", "SCAN_vdW"]) parser.add_argument('--eps_slab', nargs='+', help='eps_slab calculated manually for the slab. \ List each dielectric constant separated by a space, \ in the order corresponding to funcs and d_slab.') parser.add_argument('--d_slab', nargs='+', help='d_slab calculated manually for the slab. \ List each slab thickness separated by a space, \ in the order corresponding to funcs and eps_slab.') parser.add_argument('--dbentry', help='existing database entry to append to') parser.add_argument('--logfile', help='logfile to save output to') ## read in the above arguments from command line args = parser.parse_args(args) ## set up logging if args.logfile: myLogger = logging.setup_logging(args.logfile) else: myLogger = logging.setup_logging() if args.method == "from_vasp": if not args.dir_dft: myLogger.info( 'For method "from_vasp", dir_dft is a required argument') else: db = DatabaseEntry(args.main_system, args.monolayer, args.dir_dft, args.dir_db, args.dbentry, args.funcs, myLogger) db.create_entry_from_vasp(eps_slab=args.eps_slab, d_slab=args.d_slab) elif args.method == "from_formula": if not args.mu_limit or not args.formula: myLogger.info( 'For method "from_formula", mu_limit and formula are required arguments' ) else: myLogger.info( 'For method "from_formula", mu_limit and formula are required arguments' ) db = DatabaseEntry(args.main_system, args.monolayer, args.dir_dft, args.dir_db, args.dbentry, args.funcs) db.create_entry_from_formula(mu_limit=args.mu_limit, formula=args.formula) else: myLogger.info('Invalid method!')
def calc(vref, vdef, encut, q, threshold_slope=1e-3, threshold_C=1e-3, max_iter=20, vfile='vline-eV.dat', noplots=False, allplots=False, logfile=None): """ Estimate alignment correction. vref (str): path to bulk LOCPOT file vdef (str): path to defect LOCPOT file encut (int): cutoff energy (eV) q (int): charge (conventional units) [optional] threshold_slope (float): threshold for determining if potential is flat Default=1e-3. [optional] threshold_C (float): threshold for determining if potential is aligned Default=1e-3. [optional] max_iter (int): max. no. of shifts to try. Default=20. [optional] vfile (str): vline .dat file. Default='vline-eV.dat' [optional] noplots (bool): do not generate plots. Defaule=False. [optional] allplots (bool): save all plots. Default=False. [optional] logfile (str): logfile to save output to """ ## set up logging if logfile: myLogger = logging.setup_logging(os.path.join(os.getcwd(), logfile)) else: myLogger = logging.setup_logging() ## basic command to run sxdefectalign2d command = [ '~/sxdefectalign2d', '--vasp', '--ecut', str(encut / 13.6057), ## convert eV to Ry '--vref', vref, '--vdef', vdef ] ## initialize the range of shift values bracketing the optimal shift smin, smax = -np.inf, np.inf shift = 0.0 shifting = 'right' done = False counter = -1 time0 = time.time() while not done and counter < max_iter: counter += 1 ## run sxdefectalign2d with --shift <shift> if logfile: command1 = command + [ '--shift', str(shift), '--onlyProfile', '>> %s' % logfile ] else: command1 = command + ['--shift', str(shift), '--onlyProfile'] os.system(' '.join(command1)) ## read in the potential profiles from vline-eV.dat ## z V^{model} \DeltaV^{DFT} V^{sr} data = np.loadtxt(vfile) ## plot potential profiles if not noplots: plt.figure() plt.plot(data[:, 0], data[:, 2], 'r', label=r'$V_{def}-V_{bulk}$') plt.plot(data[:, 0], data[:, 1], 'g', label=r'$V_{model}$') plt.plot(data[:, 0], data[:, -1], 'b', label=r'$V_{def}-V_{bulk}-V_{model}$') plt.xlabel("distance along z axis (bohr)") plt.ylabel("potential (eV)") plt.xlim(data[0, 0], data[-1, 0]) plt.legend() if allplots: plt.savefig(os.getcwd() + '/alignment_%d.png' % counter) else: plt.savefig(os.getcwd() + '/alignment.png') plt.close() ## assumes that the slab is in the center of the cell vertically! ## select datapoints corresponding to 2 bohrs at the top and bottom of the supercell ## (i.e. a total of 4 bohrs in the middle of vacuum) z1 = np.min([i for i, z in enumerate(data[:, 0]) if z > 2.]) z2 = np.min( [i for i, z in enumerate(data[:, 0]) if z > (data[-1, 0] - 2.)]) ## fit straight lines through each subset of datapoints m1, C1 = np.polyfit(data[:z1, 0], data[:z1, -1], 1) m2, C2 = np.polyfit(data[z2:, 0], data[z2:, -1], 1) myLogger.debug("Slopes: %.8f %.8f; Intercepts: %.8f %.8f" % (m1, m2, C1, C2)) ## check the slopes and intercepts of the lines ## and shift the charge along z until the lines are flat if (abs(m1) < threshold_slope and abs(m2) < threshold_slope and abs(C1 - C2) < threshold_C): done = True break elif m1 * m2 < 0: myLogger.info("undetermined...make a tiny shift and try again") if shifting == 'right': shift += 0.01 else: shift -= 0.01 myLogger.info("try shift = %.8f" % shift) elif (m1 + m2) * np.sign(q) > 0: smin = shift if smax == np.inf: shift += 1.0 else: shift = (smin + smax) / 2.0 shifting = 'right' myLogger.debug("optimal shift is in [%.8f, %.8f]" % (smin, smax)) myLogger.info("shift charge in +z direction; try shift = %.8f" % shift) elif (m1 + m2) * np.sign(q) < 0: smax = shift if smin == -np.inf: shift -= 1.0 else: shift = (smin + smax) / 2.0 shifting = 'left' myLogger.debug("optimal shift is in [%.8f, %.8f]" % (smin, smax)) myLogger.info("shift charge in -z direction; try shift = %.8f" % shift) if done: C_ave = (C1 + C2) / 2 myLogger.info("DONE! shift = %.8f & alignment correction = %.8f" % (shift, C_ave)) ## run sxdefectalign2d with --shift <shift> -C <C_ave> > correction command2 = command + [ '--shift', str(shift), '-C', str(C_ave), '> correction' ] os.system(' '.join(command2)) else: myLogger.info("Could not find optimal shift after %d tries :(" % max_iter) myLogger.debug("Total time taken (s): %.2f" % (time.time() - time0))
def apply_all(dir_def, dir_ref, eps_slab=None, d_slab=None, dbentry=None, functional="GGA", encut=520, soc=False, logfile=None): """ Apply sxdefectalign2d correction to all charged defect calulations. dir_def (str): path to the main defect directory dir_ref (str): path to the pristine reference directory [optional] eps_slab (float): ave. slab dielectric constant (supply this or dbentry) [optional] d_slab (float): slab thickness in Angstroms (supply this or dbentry) [optional] dbentry (str): path to the relevant database entry .json file (supply this or eps_slab and d_slab) [optional] functionl (str): functional used for this set of calculations. Default=GGA. [optional] encut (int): cutoff energy (eV). Default=520. [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False. [optional] logfile (str): logfile to save output to """ ## set up logging if logfile: myLogger = logging.setup_logging(logfile) else: myLogger = logging.setup_logging() ## if eps_slab and d_slab are directly provided if eps_slab and d_slab: myLogger.info("eps_slab: %.2f ; d_slab: %.2f" % (eps_slab, d_slab)) elif dbentry: ## extract the eps_slab and d_slab from the relevant dbentry file if os.path.exists(dbentry): myLogger.info("Using slab properties from " + dbentry) with open(dbentry, 'r') as file: mater = json.loads(file.read()) eps_slab = mater[functional]["eps_ave"] d_slab = mater[functional]["d_slab"] myLogger.info("eps_slab: %.2f ; d_slab: %.2f" % (eps_slab, d_slab)) else: ## if can't find a dbentry file raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dbentry) else: myLogger.info( "Insufficient information provided about the slab dielectric profile" ) qs = osutils.listdironly(dir_def) for q in [qi for qi in qs if qi != 'charge_0']: for cell in osutils.listdironly(os.path.join(dir_def, q, '')): for vac in osutils.listdironly(os.path.join(dir_def, q, cell, '')): folder = os.path.join(dir_def, q, cell, vac, '') folder_ref = os.path.join(dir_ref, 'charge_0', cell, vac, '') if soc: folder = os.path.join(folder, 'dos', '') folder_ref = os.path.join(folder_ref, 'dos', '') if os.path.exists(folder) and 'restart' in osutils.listdironly( folder): folder = os.path.join(folder, 'restart', '') ## check if defectproperty.json file is present in current directory if not os.path.exists( os.path.join(folder, 'defectproperty.json')): raise FileNotFoundError( errno.ENOENT, os.strerror(errno.ENOENT), os.path.join(folder, 'defectproperty.json')) else: os.chdir(folder) ## generate SPHInX input file SPHInX_input_file.generate(eps_slab, d_slab) if (os.path.exists(os.path.join(folder, 'LOCPOT')) and os.path.exists( os.path.join(folder_ref, 'LOCPOT'))): os.chdir(os.path.join(folder, 'correction')) myLogger.info( "applying correction to calculations in %s" % folder) ## apply correction alignment_correction_2d.calc( os.path.join(folder_ref, 'LOCPOT'), os.path.join(folder, 'LOCPOT'), encut, int(q.split('_')[-1]), allplots=True, logfile='getalign.log') elif not os.path.exists(os.path.join(folder, 'LOCPOT')): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), os.path.join(folder, 'LOCPOT')) elif not os.path.exists(os.path.join(folder_ref, 'LOCPOT')): raise FileNotFoundError( errno.ENOENT, os.strerror(errno.ENOENT), os.path.join(folder_ref, 'LOCPOT'))
def setup(dir_def_main,qs,cells,vacs,functional='PBE',kppa=400,bulkref=False): """ Generate input files for defect calulations. Parameters ---------- dir_def_main (str): path to the main defect directory qs (list of ints): list of charge states cells (list of tuples of ints): list of [n1,n2,n3] supercell sizes vacs (list of ints): list of vacuum spacings [optional] functional (str): type of function: PBE(default)/SCAN+rVV10 [optional] kppa (int): kpoint density per reciprocal atom. Default=400 pra. [optional] bulkref (str): write files for reference calculations? Default=False. """ ## set up logging myLogger = logging.setup_logging() ## check if initdef file is present in dir_def_main ? if not bulkref: if osutils.check_file_exists(dir_def_main,"initdef") == True: for file in os.listdir(dir_def_main): if file.startswith("initdef"): file_initdef = file else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), os.path.join(dir_def_main,"initdefect.json")) ## check if POTCAR is present in dir_def_main ? pot_file = os.path.join(dir_def_main,"POTCAR") if not os.path.exists(pot_file): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), pot_file) ## check if relevant POSCARs are present in dir_def_main ? for vac in vacs: pos_file = os.path.join(dir_def_main,"POSCAR_vac_%d"%vac) if not os.path.exists(pos_file): raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), pos_file) for q in qs: dir_q = os.path.join(dir_def_main,"charge_%d"%q) ## create and enter charge subdirectory if not os.path.exists(dir_q): os.makedirs(dir_q) os.chdir(dir_q) for cell in cells: cell_str = "%sx%sx%s"%(cell[0],cell[1],cell[2]) dir_cell = os.path.join(dir_q,cell_str) ## create and enter supercell subdirectory if not os.path.exists(dir_cell): os.makedirs(dir_cell) os.chdir(dir_cell) for vac in vacs: dir_vac = os.path.join(dir_cell,"vac_%d"%vac) ## create and enter vacuum subdirectory if not os.path.exists(dir_vac): os.makedirs(dir_vac) os.chdir(dir_vac) myLogger.info("generating input files in "+os.getcwd()) ## cp POTCAR from dir_main shutil.copyfile(os.path.join(dir_def_main,"POTCAR"), os.path.join(os.getcwd(),"POTCAR")) ## generate defect POSCAR if bulkref: gen_defect_supercell.generate(dir_def_main,'dummy.json', q,cell,vac, bulkref=True) else: gen_defect_supercell.generate(dir_def_main,file_initdef, q,cell,vac) ## generate INCAR incar.generate(q=q,functional=functional) ## generate KPOINTS kpoints.generate_uniform(kppa=kppa) ## generate submission script ## with the default settings nodes=1, cpus=32, mem=2048, time=24:00:00 ## PLEASE CHANGE EMAIL SETTINGS!!! if bulkref: submit.generate(jobname='ref_%s_%d'%(cell_str,vac),email='*****@*****.**') else: submit.generate(email='*****@*****.**') os.chdir(dir_def_main)
def calc(main_system, dir_db, dir_def, xlfile, mu_limit, functional="GGA", soc=False, logfile=None): """ Evaluate uncorrected defect formation energy. Parameters ---------- main_system (str): the main system e.g. MoS2, WSe2 dir_db (str): path to the database directory dir_def (str): path to the defect directory containing the excel, initdefect.json files xlfile (str): excel filename to read/save the dataframe from/to mu_limit (str): which chemical potential limit to consider, e.g. Mo-rich [optional] functionl (str): functional used for this set of calculations. Default=GGA. [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False. [optional] logfile (str): logfile to save output to """ ## set up logging if logfile: myLogger = logging.setup_logging(logfile) else: myLogger = logging.setup_logging() ## load list of dataframes from sheets from excel file df = pd.read_excel(os.path.join(dir_def, xlfile), sheet_name=None) ## find initdef.json file if osutils.check_file_exists(dir_def, "initdef") == True: for file in os.listdir(dir_def): if file.startswith("initdef"): file_initdef = file ## get species i and ni from initdefect.json file with open(os.path.join(dir_def, file_initdef), 'r') as file: initdef = json.loads(file.read()) species_list, ni_list = [], [] for defect in initdef: species, ni = get_i_ni(initdef[defect]) species_list += species ni_list += ni myLogger.info("Atoms added/removed: " + \ ", ".join([str(n)+"*"+i for n,i in zip(ni_list,species_list)])) for q in [qi for qi in df.keys()]: ## get the relevant chemical potentials found_mu = True for species in species_list: mu = "mu_%s_%s" % (species, mu_limit) ## check if the relevant database entry exists if osutils.check_file_exists(dir_db, "%s.json" % species) == True: dbentry_file = "%s.json" % species with open(os.path.join(dir_db, dbentry_file), 'r') as file: mater = json.loads(file.read()) ## search for appropriate mu entry mu_key = "mu" for key in mater[functional].keys(): if key.startswith("mu_%s" % mu_limit): mu_key = key myLogger.info("Using chemical potential " + mu_key + " from " + dbentry_file) ## input the corresponding mus into the dataframe df[q][mu] = mater[functional][mu_key] else: myLogger.info("Cannot find the database entry for " + species) found_mu = False ## get the VBMs ## check if the relevant database entry exists if osutils.check_file_exists(dir_db, "%s.json" % main_system) == True: dbentry_file = "%s.json" % (main_system) with open(os.path.join(dir_db, dbentry_file), 'r') as file: mater = json.loads(file.read()) ## input the VBMs corresponding to each vacuum spacing into the dataframe for rowind in df[q].index.values: vac = df[q].loc[rowind].vacuum if vac in mater[functional].keys(): df[q].at[rowind, 'VBM'] = mater[functional][vac]["VBM"] else: myLogger.info("Cannot find the VBM entry for " + vac) ## Finally, we can compute the uncorrected defect formation energy: ## Eform = Etot(def) - Etot(pristine) - sum(n_i*mu_i) + q*E_Fermi if found_mu: ## proceed if chemical potentials and VBMs have been correctly entered sum_mu = 0 for n, species in zip(ni_list, species_list): mu = "mu_%s_%s" % (species, mu_limit) sum_mu += n * df[q][mu] if q == 'charge_0': colname = "E_form_corr" else: colname = "E_form_uncorr" df[q][colname] = df[q].loc[:,'E_def'] \ - df[q].loc[:,'E_bulk'] \ - sum_mu \ + int(q.split("_")[-1]) * df[q].loc[:,'VBM'] else: myLogger.info("Cannot find the database entry for " + main_system) ## write the updated excel file writer = pd.ExcelWriter(os.path.join(dir_def, xlfile)) for q in df.keys(): df[q].to_excel(writer, q, index=False) writer.save()
def parse(path_def, path_ref, xlfile, soc=False, logfile=None): """ Parse total energies from OUTCARs and save into a pandas dataframe. Parameters ---------- path_def (str): path to the directory containing all the defect output files path_ref (str): path to the directory containing all the reference output files xlfile (str): excel filename to save the dataframe to [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False. [optional] logfile (str): logfile to save output to """ ## set up logging if logfile: myLogger = logging.setup_logging(logfile) else: myLogger = logging.setup_logging() qs = osutils.listdironly(path_def) writer = pd.ExcelWriter(os.path.join(path_def, xlfile)) time0 = time.time() ## set up dataframe for neutral defect first if 'charge_0' not in qs: myLogger.warning("can't find output files for neutral defect") else: df0 = pd.DataFrame( columns=['vacuum', 'supercell', 'N', '1/N', 'E_def', 'E_bulk']) for cell in osutils.listdironly(os.path.join(path_def, 'charge_0')): for vac in osutils.listdironly( os.path.join(path_def, 'charge_0', cell)): myLogger.info("parsing neutral %s %s" % (cell, vac)) subdir_def = os.path.join(path_def, 'charge_0', cell, vac) subdir_ref = os.path.join(path_ref, 'charge_0', cell, vac) if soc: dir_soc = [ dirname for dirname in osutils.listdironly(subdir_def) if "soc" in dirname ] if len(dir_soc) == 0: myLogger.info("cannot find a soc subdirectory") if len(dir_soc) > 1: myLogger.info( "multiple possible soc subdirectories found") if len(dir_soc) == 1: subdir_def = os.path.join(subdir_def, dir_soc[0]) subdir_ref = os.path.join(subdir_ref, dir_soc[0]) myLogger.info("parsing soc subdirectory") if os.path.exists( subdir_def) and 'restart' in osutils.listdironly( subdir_def): subdir_def = os.path.join(subdir_def, 'restart') myLogger.info("parsing restart subdirectory") vr_file = os.path.join(subdir_def, 'vasprun.xml') vr_ref_file = os.path.join(subdir_ref, 'vasprun.xml') if not os.path.exists(vr_file): myLogger.warning("%s file does not exist" % vr_file) elif not os.path.exists(vr_ref_file): myLogger.warning("%s file does not exist" % vr_ref_file) else: natoms = np.sum( Poscar.from_file(os.path.join(subdir_ref, 'POSCAR')).natoms) vr = Vasprun(vr_file) vr_ref = Vasprun(vr_ref_file) if not vr.converged: myLogger.warning( "VASP calculation in %s may not be converged" % subdir_def) df0.loc[len(df0)] = [ vac, cell, natoms, 1 / natoms, vr.final_energy, vr_ref.final_energy ] df0.sort_values(['vacuum', 'N'], inplace=True) df0.to_excel(writer, 'charge_0', index=False) ## modify dataframe for charged defects for q in [qi for qi in qs if qi != 'charge_0']: df = df0.copy(deep=True) for cell in osutils.listdironly(os.path.join(path_def, q)): for vac in osutils.listdironly(os.path.join(path_def, q, cell)): myLogger.info("parsing %s %s %s" % (q, cell, vac)) subdir_def = os.path.join(path_def, q, cell, vac) if soc: dir_soc = [ dirname for dirname in osutils.listdironly(subdir_def) if "soc" in dirname ] if len(dir_soc) == 0: myLogger.info("cannot find a soc subdirectory") if len(dir_soc) > 1: myLogger.info( "multiple possible soc subdirectories found") if len(dir_soc) == 1: subdir_def = os.path.join(subdir_def, dir_soc[0]) myLogger.info("parsing soc subdirectory") if os.path.exists( subdir_def) and 'restart' in osutils.listdironly( subdir_def): subdir_def = os.path.join(subdir_def, 'restart') myLogger.info("parsing restart subdirectory") vr_file = os.path.join(subdir_def, 'vasprun.xml') if not os.path.exists(vr_file): myLogger.warning("%s file does not exist" % vr_file) else: vr = Vasprun(vr_file) if not vr.converged: myLogger.warning( "VASP calculation in %s may not be converged" % subdir_def) df.loc[(df['vacuum'] == vac) & (df['supercell'] == cell), 'E_def'] = vr.final_energy df.to_excel(writer, q, index=False) writer.save() myLogger.debug("Total time taken (s): %.2f" % (time.time() - time0))