def parse(dir_def,xlfile,soc=False,logfile=None):
    
    """
    Parse correction calculated by sxdefectalign2d and add to pandas dataframe.
    
    dir_def (str): path to the defect directory (also where the excel file is)
    xlfile (str): excel filename to read/save the dataframe from/to
    [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False.
    [optional] logfile (str): logfile to save output to
    
    """
    
    ## set up logging
    if logfile:
        myLogger = logging.setup_logging(logfile)
    else:
        myLogger = logging.setup_logging()
        

    ## load list of dataframes from sheets from excel file    
    df = pd.read_excel(os.path.join(dir_def,xlfile),sheet_name=None)
       
    for q in [qi for qi in df.keys() if qi != 'charge_0']:
        df[q]['E_corr'] = np.nan


    for q in osutils.listdironly(os.path.join(dir_def,'')):
        if q != 'charge_0':
            for cell in osutils.listdironly(os.path.join(dir_def,q,'')): 
                for vac in osutils.listdironly(os.path.join(dir_def,q,cell,'')): 
                    myLogger.info("parsing %s %s %s"%(q,cell,vac))
                    
                    folder = os.path.join(dir_def,q,cell,vac,'')
                    
                    if soc: 
                        myLogger.info("parsing dos subdirectory")
                        folder = os.path.join(folder,'dos','')     
                    if os.path.exists(folder) and 'restart' in osutils.listdironly(folder):
                        folder = os.path.join(folder,'restart','')
                        myLogger.info("parsing restart subdirectory")
    
                    if not os.path.exists(os.path.join(folder,'correction','correction')):
                        myLogger.warning("correction file does not exist")
                    else:
                        with open(os.path.join(folder,'correction','correction')) as f:
                            lines = f.readlines()
                            for line in lines:
                                if line[:21] == 'iso - periodic energy':
                                    E_corr = float(line.split()[-2])
                        df[q].loc[(df[q]['vacuum'] == vac) & 
                                  (df[q]['supercell'] == cell),'E_corr'] = E_corr


    ## write the updated excel file
    writer = pd.ExcelWriter(os.path.join(dir_def,xlfile))
    for q in df.keys():  
        df[q].to_excel(writer, q, index=False)
    writer.save()    
def main(args):

    ## define a main function callable from another python script
    parser = argparse.ArgumentParser(
        description='Create or modify simple database entries')
    parser.add_argument('main_system', help='the main system e.g. MoS2, WSe2')
    parser.add_argument('dir_db', help='path to the database directory')
    parser.add_argument('method',
                        help='how to construct database entry \
                        ("from_vasp" or "from_formula")')

    parser.add_argument('--dir_dft',
                        help='required if method is "from_vasp": \
                        path to main directory containing DFT calcs for this system'
                        )

    parser.add_argument('--mu_limit',
                        help='required if method is "from_formula": \
                        which chemical potential limit to consider, e.g. Mo-rich'
                        )
    parser.add_argument('--formula',
                        help='required if method is "formula": \
                        formula to use to calculate chemical potential, e.g. 0.5*MoS2-0.5*Mo'
                        )

    parser.add_argument(
        '--monolayer',
        help='is this a monolayer system with vacuum dependence?',
        default=False,
        action='store_true')
    parser.add_argument('--funcs',
                        nargs='+',
                        help='list each functional separated by a space, \
                        use same naming convention as subdirectories in dir_dft',
                        default=["GGA", "SCAN_vdW"])
    parser.add_argument('--eps_slab',
                        nargs='+',
                        help='eps_slab calculated manually for the slab. \
                        List each dielectric constant separated by a space, \
                        in the order corresponding to funcs and d_slab.')
    parser.add_argument('--d_slab',
                        nargs='+',
                        help='d_slab calculated manually for the slab. \
                        List each slab thickness separated by a space, \
                        in the order corresponding to funcs and eps_slab.')
    parser.add_argument('--dbentry',
                        help='existing database entry to append to')
    parser.add_argument('--logfile', help='logfile to save output to')

    ## read in the above arguments from command line
    args = parser.parse_args(args)

    ## set up logging
    if args.logfile:
        myLogger = logging.setup_logging(args.logfile)
    else:
        myLogger = logging.setup_logging()

    if args.method == "from_vasp":
        if not args.dir_dft:
            myLogger.info(
                'For method "from_vasp", dir_dft is a required argument')
        else:
            db = DatabaseEntry(args.main_system, args.monolayer, args.dir_dft,
                               args.dir_db, args.dbentry, args.funcs, myLogger)
            db.create_entry_from_vasp(eps_slab=args.eps_slab,
                                      d_slab=args.d_slab)

    elif args.method == "from_formula":
        if not args.mu_limit or not args.formula:
            myLogger.info(
                'For method "from_formula", mu_limit and formula are required arguments'
            )
        else:
            myLogger.info(
                'For method "from_formula", mu_limit and formula are required arguments'
            )
            db = DatabaseEntry(args.main_system, args.monolayer, args.dir_dft,
                               args.dir_db, args.dbentry, args.funcs)
            db.create_entry_from_formula(mu_limit=args.mu_limit,
                                         formula=args.formula)

    else:
        myLogger.info('Invalid method!')
def calc(vref,
         vdef,
         encut,
         q,
         threshold_slope=1e-3,
         threshold_C=1e-3,
         max_iter=20,
         vfile='vline-eV.dat',
         noplots=False,
         allplots=False,
         logfile=None):
    """
    Estimate alignment correction.
    
    vref (str): path to bulk LOCPOT file
    vdef (str): path to defect LOCPOT file
    encut (int): cutoff energy (eV)
    q (int): charge (conventional units)
    [optional] threshold_slope (float): threshold for determining if potential is flat
                                        Default=1e-3.
    [optional] threshold_C (float): threshold for determining if potential is aligned
                                    Default=1e-3.                                        
    [optional] max_iter (int): max. no. of shifts to try. Default=20.
    [optional] vfile (str): vline .dat file. Default='vline-eV.dat'
    [optional] noplots (bool): do not generate plots. Defaule=False.
    [optional] allplots (bool): save all plots. Default=False.
    [optional] logfile (str): logfile to save output to 
    
    """

    ## set up logging
    if logfile:
        myLogger = logging.setup_logging(os.path.join(os.getcwd(), logfile))
    else:
        myLogger = logging.setup_logging()

    ## basic command to run sxdefectalign2d
    command = [
        '~/sxdefectalign2d',
        '--vasp',
        '--ecut',
        str(encut / 13.6057),  ## convert eV to Ry
        '--vref',
        vref,
        '--vdef',
        vdef
    ]

    ## initialize the range of shift values bracketing the optimal shift
    smin, smax = -np.inf, np.inf
    shift = 0.0
    shifting = 'right'
    done = False
    counter = -1

    time0 = time.time()
    while not done and counter < max_iter:
        counter += 1
        ## run sxdefectalign2d with --shift <shift>
        if logfile:
            command1 = command + [
                '--shift',
                str(shift), '--onlyProfile',
                '>> %s' % logfile
            ]
        else:
            command1 = command + ['--shift', str(shift), '--onlyProfile']
        os.system(' '.join(command1))

        ## read in the potential profiles from vline-eV.dat
        ## z  V^{model}  \DeltaV^{DFT}  V^{sr}
        data = np.loadtxt(vfile)

        ## plot potential profiles
        if not noplots:
            plt.figure()
            plt.plot(data[:, 0], data[:, 2], 'r', label=r'$V_{def}-V_{bulk}$')
            plt.plot(data[:, 0], data[:, 1], 'g', label=r'$V_{model}$')
            plt.plot(data[:, 0],
                     data[:, -1],
                     'b',
                     label=r'$V_{def}-V_{bulk}-V_{model}$')
            plt.xlabel("distance along z axis (bohr)")
            plt.ylabel("potential (eV)")
            plt.xlim(data[0, 0], data[-1, 0])
            plt.legend()
            if allplots:
                plt.savefig(os.getcwd() + '/alignment_%d.png' % counter)
            else:
                plt.savefig(os.getcwd() + '/alignment.png')
            plt.close()

        ## assumes that the slab is in the center of the cell vertically!
        ## select datapoints corresponding to 2 bohrs at the top and bottom of the supercell
        ## (i.e. a total of 4 bohrs in the middle of vacuum)
        z1 = np.min([i for i, z in enumerate(data[:, 0]) if z > 2.])
        z2 = np.min(
            [i for i, z in enumerate(data[:, 0]) if z > (data[-1, 0] - 2.)])

        ## fit straight lines through each subset of datapoints
        m1, C1 = np.polyfit(data[:z1, 0], data[:z1, -1], 1)
        m2, C2 = np.polyfit(data[z2:, 0], data[z2:, -1], 1)
        myLogger.debug("Slopes: %.8f %.8f; Intercepts: %.8f %.8f" %
                       (m1, m2, C1, C2))

        ## check the slopes and intercepts of the lines
        ## and shift the charge along z until the lines are flat
        if (abs(m1) < threshold_slope and abs(m2) < threshold_slope
                and abs(C1 - C2) < threshold_C):
            done = True
            break
        elif m1 * m2 < 0:
            myLogger.info("undetermined...make a tiny shift and try again")
            if shifting == 'right':
                shift += 0.01
            else:
                shift -= 0.01
            myLogger.info("try shift = %.8f" % shift)
        elif (m1 + m2) * np.sign(q) > 0:
            smin = shift
            if smax == np.inf:
                shift += 1.0
            else:
                shift = (smin + smax) / 2.0
            shifting = 'right'
            myLogger.debug("optimal shift is in [%.8f, %.8f]" % (smin, smax))
            myLogger.info("shift charge in +z direction; try shift = %.8f" %
                          shift)
        elif (m1 + m2) * np.sign(q) < 0:
            smax = shift
            if smin == -np.inf:
                shift -= 1.0
            else:
                shift = (smin + smax) / 2.0
            shifting = 'left'
            myLogger.debug("optimal shift is in [%.8f, %.8f]" % (smin, smax))
            myLogger.info("shift charge in -z direction; try shift = %.8f" %
                          shift)

    if done:
        C_ave = (C1 + C2) / 2
        myLogger.info("DONE! shift = %.8f & alignment correction = %.8f" %
                      (shift, C_ave))
        ## run sxdefectalign2d with --shift <shift> -C <C_ave> > correction
        command2 = command + [
            '--shift', str(shift), '-C',
            str(C_ave), '> correction'
        ]
        os.system(' '.join(command2))
    else:
        myLogger.info("Could not find optimal shift after %d tries :(" %
                      max_iter)

    myLogger.debug("Total time taken (s): %.2f" % (time.time() - time0))
Example #4
0
def apply_all(dir_def,
              dir_ref,
              eps_slab=None,
              d_slab=None,
              dbentry=None,
              functional="GGA",
              encut=520,
              soc=False,
              logfile=None):
    """
    Apply sxdefectalign2d correction to all charged defect calulations.
    
    dir_def (str): path to the main defect directory
    dir_ref (str): path to the pristine reference directory
    [optional] eps_slab (float): ave. slab dielectric constant (supply this or dbentry)
    [optional] d_slab (float): slab thickness in Angstroms (supply this or dbentry)
    [optional] dbentry (str): path to the relevant database entry .json file
                              (supply this or eps_slab and d_slab)                          
    [optional] functionl (str): functional used for this set of calculations. Default=GGA.
    [optional] encut (int): cutoff energy (eV). Default=520.
    [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False.
    [optional] logfile (str): logfile to save output to                              

    """

    ## set up logging
    if logfile:
        myLogger = logging.setup_logging(logfile)
    else:
        myLogger = logging.setup_logging()

    ## if eps_slab and d_slab are directly provided
    if eps_slab and d_slab:
        myLogger.info("eps_slab: %.2f ; d_slab: %.2f" % (eps_slab, d_slab))

    elif dbentry:
        ## extract the eps_slab and d_slab from the relevant dbentry file
        if os.path.exists(dbentry):
            myLogger.info("Using slab properties from " + dbentry)
            with open(dbentry, 'r') as file:
                mater = json.loads(file.read())
                eps_slab = mater[functional]["eps_ave"]
                d_slab = mater[functional]["d_slab"]
                myLogger.info("eps_slab: %.2f ; d_slab: %.2f" %
                              (eps_slab, d_slab))
        else:
            ## if can't find a dbentry file
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                    dbentry)

    else:
        myLogger.info(
            "Insufficient information provided about the slab dielectric profile"
        )

    qs = osutils.listdironly(dir_def)
    for q in [qi for qi in qs if qi != 'charge_0']:
        for cell in osutils.listdironly(os.path.join(dir_def, q, '')):
            for vac in osutils.listdironly(os.path.join(dir_def, q, cell, '')):
                folder = os.path.join(dir_def, q, cell, vac, '')
                folder_ref = os.path.join(dir_ref, 'charge_0', cell, vac, '')

                if soc:
                    folder = os.path.join(folder, 'dos', '')
                    folder_ref = os.path.join(folder_ref, 'dos', '')
                if os.path.exists(folder) and 'restart' in osutils.listdironly(
                        folder):
                    folder = os.path.join(folder, 'restart', '')

                ## check if defectproperty.json file is present in current directory
                if not os.path.exists(
                        os.path.join(folder, 'defectproperty.json')):
                    raise FileNotFoundError(
                        errno.ENOENT, os.strerror(errno.ENOENT),
                        os.path.join(folder, 'defectproperty.json'))

                else:
                    os.chdir(folder)
                    ## generate SPHInX input file
                    SPHInX_input_file.generate(eps_slab, d_slab)

                    if (os.path.exists(os.path.join(folder, 'LOCPOT'))
                            and os.path.exists(
                                os.path.join(folder_ref, 'LOCPOT'))):

                        os.chdir(os.path.join(folder, 'correction'))
                        myLogger.info(
                            "applying correction to calculations in %s" %
                            folder)

                        ## apply correction
                        alignment_correction_2d.calc(
                            os.path.join(folder_ref, 'LOCPOT'),
                            os.path.join(folder, 'LOCPOT'),
                            encut,
                            int(q.split('_')[-1]),
                            allplots=True,
                            logfile='getalign.log')

                    elif not os.path.exists(os.path.join(folder, 'LOCPOT')):
                        raise FileNotFoundError(errno.ENOENT,
                                                os.strerror(errno.ENOENT),
                                                os.path.join(folder, 'LOCPOT'))

                    elif not os.path.exists(os.path.join(folder_ref,
                                                         'LOCPOT')):
                        raise FileNotFoundError(
                            errno.ENOENT, os.strerror(errno.ENOENT),
                            os.path.join(folder_ref, 'LOCPOT'))
Example #5
0
def setup(dir_def_main,qs,cells,vacs,functional='PBE',kppa=400,bulkref=False):

    """ 
    Generate input files for defect calulations.
    
    Parameters
    ----------
    dir_def_main (str): path to the main defect directory
    qs (list of ints): list of charge states
    cells (list of tuples of ints): list of [n1,n2,n3] supercell sizes
    vacs (list of ints): list of vacuum spacings
    [optional] functional (str): type of function: PBE(default)/SCAN+rVV10
    [optional] kppa (int): kpoint density per reciprocal atom. Default=400 pra.
    [optional] bulkref (str): write files for reference calculations? Default=False.
    
    """

    ## set up logging
    myLogger = logging.setup_logging()
    
    
    ## check if initdef file is present in dir_def_main ?
    if not bulkref:
        if osutils.check_file_exists(dir_def_main,"initdef") == True:
            for file in os.listdir(dir_def_main): 
                if file.startswith("initdef"):
                    file_initdef = file
        else:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), 
                                    os.path.join(dir_def_main,"initdefect.json"))
            
        
    ## check if POTCAR is present in dir_def_main ?
    pot_file = os.path.join(dir_def_main,"POTCAR")
    if not os.path.exists(pot_file):
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), pot_file)
    
    
    ## check if relevant POSCARs are present in dir_def_main ?
    for vac in vacs:
        pos_file = os.path.join(dir_def_main,"POSCAR_vac_%d"%vac)
        if not os.path.exists(pos_file):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), pos_file)
    
    
    for q in qs:
        dir_q = os.path.join(dir_def_main,"charge_%d"%q)
        ## create and enter charge subdirectory
        if not os.path.exists(dir_q):
            os.makedirs(dir_q)
        os.chdir(dir_q)
        
        for cell in cells:
            cell_str = "%sx%sx%s"%(cell[0],cell[1],cell[2])
            dir_cell = os.path.join(dir_q,cell_str)
            ## create and enter supercell subdirectory
            if not os.path.exists(dir_cell):
                os.makedirs(dir_cell)
            os.chdir(dir_cell)
            
            for vac in vacs:
                dir_vac = os.path.join(dir_cell,"vac_%d"%vac)
                ## create and enter vacuum subdirectory
                if not os.path.exists(dir_vac):
                    os.makedirs(dir_vac)
                os.chdir(dir_vac)
                
                myLogger.info("generating input files in "+os.getcwd())
                
                ## cp POTCAR from dir_main
                shutil.copyfile(os.path.join(dir_def_main,"POTCAR"),
                                os.path.join(os.getcwd(),"POTCAR"))
                
                ## generate defect POSCAR
                if bulkref:
                    gen_defect_supercell.generate(dir_def_main,'dummy.json',
                                                  q,cell,vac,
                                                  bulkref=True)
                else:
                    gen_defect_supercell.generate(dir_def_main,file_initdef,
                                                  q,cell,vac)
                
                ## generate INCAR
                incar.generate(q=q,functional=functional)
                
                ## generate KPOINTS
                kpoints.generate_uniform(kppa=kppa)
                
                ## generate submission script 
                ## with the default settings nodes=1, cpus=32, mem=2048, time=24:00:00
                ## PLEASE CHANGE EMAIL SETTINGS!!!
                if bulkref:
                    submit.generate(jobname='ref_%s_%d'%(cell_str,vac),email='*****@*****.**')
                else:
                    submit.generate(email='*****@*****.**')
      
          
    os.chdir(dir_def_main)
def calc(main_system,
         dir_db,
         dir_def,
         xlfile,
         mu_limit,
         functional="GGA",
         soc=False,
         logfile=None):
    """ 
    Evaluate uncorrected defect formation energy.
    
    Parameters
    ----------
    main_system (str): the main system e.g. MoS2, WSe2
    dir_db (str): path to the database directory
    dir_def (str): path to the defect directory containing the excel, initdefect.json files
    xlfile (str): excel filename to read/save the dataframe from/to
    mu_limit (str): which chemical potential limit to consider, e.g. Mo-rich
    [optional] functionl (str): functional used for this set of calculations. Default=GGA.
    [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False.
    [optional] logfile (str): logfile to save output to
    
    """

    ## set up logging
    if logfile:
        myLogger = logging.setup_logging(logfile)
    else:
        myLogger = logging.setup_logging()

    ## load list of dataframes from sheets from excel file
    df = pd.read_excel(os.path.join(dir_def, xlfile), sheet_name=None)

    ## find initdef.json file
    if osutils.check_file_exists(dir_def, "initdef") == True:
        for file in os.listdir(dir_def):
            if file.startswith("initdef"):
                file_initdef = file
        ##  get species i and ni from initdefect.json file
        with open(os.path.join(dir_def, file_initdef), 'r') as file:
            initdef = json.loads(file.read())
            species_list, ni_list = [], []
            for defect in initdef:
                species, ni = get_i_ni(initdef[defect])
                species_list += species
                ni_list += ni
        myLogger.info("Atoms added/removed: " + \
                     ", ".join([str(n)+"*"+i for n,i in zip(ni_list,species_list)]))

    for q in [qi for qi in df.keys()]:

        ## get the relevant chemical potentials
        found_mu = True
        for species in species_list:
            mu = "mu_%s_%s" % (species, mu_limit)

            ## check if the relevant database entry exists
            if osutils.check_file_exists(dir_db, "%s.json" % species) == True:
                dbentry_file = "%s.json" % species
                with open(os.path.join(dir_db, dbentry_file), 'r') as file:
                    mater = json.loads(file.read())
                ## search for appropriate mu entry
                mu_key = "mu"
                for key in mater[functional].keys():
                    if key.startswith("mu_%s" % mu_limit):
                        mu_key = key
                myLogger.info("Using chemical potential " + mu_key + " from " +
                              dbentry_file)
                ## input the corresponding mus into the dataframe
                df[q][mu] = mater[functional][mu_key]

            else:
                myLogger.info("Cannot find the database entry for " + species)
                found_mu = False

        ## get the VBMs
        ## check if the relevant database entry exists
        if osutils.check_file_exists(dir_db, "%s.json" % main_system) == True:
            dbentry_file = "%s.json" % (main_system)
            with open(os.path.join(dir_db, dbentry_file), 'r') as file:
                mater = json.loads(file.read())

            ## input the VBMs corresponding to each vacuum spacing into the dataframe
            for rowind in df[q].index.values:
                vac = df[q].loc[rowind].vacuum
                if vac in mater[functional].keys():
                    df[q].at[rowind, 'VBM'] = mater[functional][vac]["VBM"]
                else:
                    myLogger.info("Cannot find the VBM entry for " + vac)

            ## Finally, we can compute the uncorrected defect formation energy:
            ## Eform = Etot(def) - Etot(pristine) - sum(n_i*mu_i) + q*E_Fermi
            if found_mu:
                ## proceed if chemical potentials and VBMs have been correctly entered
                sum_mu = 0
                for n, species in zip(ni_list, species_list):
                    mu = "mu_%s_%s" % (species, mu_limit)
                    sum_mu += n * df[q][mu]
                if q == 'charge_0':
                    colname = "E_form_corr"
                else:
                    colname = "E_form_uncorr"
                df[q][colname] = df[q].loc[:,'E_def'] \
                                 - df[q].loc[:,'E_bulk'] \
                                 - sum_mu \
                                 + int(q.split("_")[-1]) * df[q].loc[:,'VBM']

        else:
            myLogger.info("Cannot find the database entry for " + main_system)

    ## write the updated excel file
    writer = pd.ExcelWriter(os.path.join(dir_def, xlfile))
    for q in df.keys():
        df[q].to_excel(writer, q, index=False)
    writer.save()
def parse(path_def, path_ref, xlfile, soc=False, logfile=None):
    """ 
    Parse total energies from OUTCARs and save into a pandas dataframe.
    
    Parameters
    ----------
    path_def (str): path to the directory containing all the defect output files
    path_ref (str): path to the directory containing all the reference output files
    xlfile (str): excel filename to save the dataframe to
    [optional] soc (bool): whether or not to look in soc(dos) subdirectory. Default=False.
    [optional] logfile (str): logfile to save output to
    
    """

    ## set up logging
    if logfile:
        myLogger = logging.setup_logging(logfile)
    else:
        myLogger = logging.setup_logging()

    qs = osutils.listdironly(path_def)

    writer = pd.ExcelWriter(os.path.join(path_def, xlfile))

    time0 = time.time()

    ## set up dataframe for neutral defect first
    if 'charge_0' not in qs:
        myLogger.warning("can't find output files for neutral defect")
    else:
        df0 = pd.DataFrame(
            columns=['vacuum', 'supercell', 'N', '1/N', 'E_def', 'E_bulk'])

        for cell in osutils.listdironly(os.path.join(path_def, 'charge_0')):
            for vac in osutils.listdironly(
                    os.path.join(path_def, 'charge_0', cell)):
                myLogger.info("parsing neutral %s %s" % (cell, vac))

                subdir_def = os.path.join(path_def, 'charge_0', cell, vac)
                subdir_ref = os.path.join(path_ref, 'charge_0', cell, vac)

                if soc:
                    dir_soc = [
                        dirname for dirname in osutils.listdironly(subdir_def)
                        if "soc" in dirname
                    ]
                    if len(dir_soc) == 0:
                        myLogger.info("cannot find a soc subdirectory")
                    if len(dir_soc) > 1:
                        myLogger.info(
                            "multiple possible soc subdirectories found")
                    if len(dir_soc) == 1:
                        subdir_def = os.path.join(subdir_def, dir_soc[0])
                        subdir_ref = os.path.join(subdir_ref, dir_soc[0])
                        myLogger.info("parsing soc subdirectory")

                if os.path.exists(
                        subdir_def) and 'restart' in osutils.listdironly(
                            subdir_def):
                    subdir_def = os.path.join(subdir_def, 'restart')
                    myLogger.info("parsing restart subdirectory")

                vr_file = os.path.join(subdir_def, 'vasprun.xml')
                vr_ref_file = os.path.join(subdir_ref, 'vasprun.xml')

                if not os.path.exists(vr_file):
                    myLogger.warning("%s file does not exist" % vr_file)

                elif not os.path.exists(vr_ref_file):
                    myLogger.warning("%s file does not exist" % vr_ref_file)

                else:
                    natoms = np.sum(
                        Poscar.from_file(os.path.join(subdir_ref,
                                                      'POSCAR')).natoms)
                    vr = Vasprun(vr_file)
                    vr_ref = Vasprun(vr_ref_file)

                    if not vr.converged:
                        myLogger.warning(
                            "VASP calculation in %s may not be converged" %
                            subdir_def)

                    df0.loc[len(df0)] = [
                        vac, cell, natoms, 1 / natoms, vr.final_energy,
                        vr_ref.final_energy
                    ]

        df0.sort_values(['vacuum', 'N'], inplace=True)
        df0.to_excel(writer, 'charge_0', index=False)

    ## modify dataframe for charged defects
    for q in [qi for qi in qs if qi != 'charge_0']:
        df = df0.copy(deep=True)

        for cell in osutils.listdironly(os.path.join(path_def, q)):
            for vac in osutils.listdironly(os.path.join(path_def, q, cell)):
                myLogger.info("parsing %s %s %s" % (q, cell, vac))

                subdir_def = os.path.join(path_def, q, cell, vac)

                if soc:
                    dir_soc = [
                        dirname for dirname in osutils.listdironly(subdir_def)
                        if "soc" in dirname
                    ]
                    if len(dir_soc) == 0:
                        myLogger.info("cannot find a soc subdirectory")
                    if len(dir_soc) > 1:
                        myLogger.info(
                            "multiple possible soc subdirectories found")
                    if len(dir_soc) == 1:
                        subdir_def = os.path.join(subdir_def, dir_soc[0])
                        myLogger.info("parsing soc subdirectory")

                if os.path.exists(
                        subdir_def) and 'restart' in osutils.listdironly(
                            subdir_def):
                    subdir_def = os.path.join(subdir_def, 'restart')
                    myLogger.info("parsing restart subdirectory")

                vr_file = os.path.join(subdir_def, 'vasprun.xml')

                if not os.path.exists(vr_file):
                    myLogger.warning("%s file does not exist" % vr_file)

                else:
                    vr = Vasprun(vr_file)

                    if not vr.converged:
                        myLogger.warning(
                            "VASP calculation in %s may not be converged" %
                            subdir_def)

                    df.loc[(df['vacuum'] == vac) & (df['supercell'] == cell),
                           'E_def'] = vr.final_energy

        df.to_excel(writer, q, index=False)

    writer.save()

    myLogger.debug("Total time taken (s): %.2f" % (time.time() - time0))