Example #1
0
def readStructs(datadir, shallow=True, name="output"):
    """
        Currently designed for output from single layer directory trees.
        Reads in final adsorption geometries and energy data, returns dataframe with geometry and energy data

        Input:
            datadir: string that points to directory containing the following:
                - convergence: each line i has convergence status of run i
                - energies: each line i has total energy and ads energy from run i
                - output{indices}.gen: final geometries for each index
                
            slabEnergy: energy of slab
            adsorbateEnergy: energy of the adsorbate in the system

        Returns:
            output: pd Dataframe with:
                - index: indices for runs that worked
                - geometry: final geometry of run
                - total energy: raw energy from file
                - adsorption energy: energy as adjusted by adsorbate_energy
    """
    geometries = {}
    if shallow:
        pattern = r"{}(\d+).gen".format(name)
    else:
        pattern = r"{}(\d+-\d+).gen".format(name)
    files = os.listdir(datadir)

    if "energies" in files and "convergence" in files:
        convergence = pd.read_csv(datadir + "convergence", header=None)
        energies = pd.read_csv(datadir + "energies", header=None)
        output = pd.concat([energies, convergence], axis=1)
        output.columns = ["E", "E_ads", "conv"]

        for i in files:
            key = re.search(pattern, i)
            if key:
                if shallow:
                    key = int(key.group(1))
                else:
                    key = key.group(1)
                geometries[key] = gen.read_gen(datadir + i)
        output['geom'] = pd.Series(geometries)

        output = output[output['conv'] == "Geometry converged"]
        output = output.drop("conv", axis=1)

    else:
        for i in files:
            key = re.search(pattern, i)
            if key:
                if shallow:
                    key = int(key.group(1))
                else:
                    key = key.group(1)
                geometries[key] = gen.read_gen(datadir + i)
        output = pd.DataFrame(pd.Series(geometries))
        output.columns = ['geom']
    return output
Example #2
0
def getslabs(data, directory, useInputs=False):
    """
    Utility for getting and writing slab files from readData (utils.py) function
    data is the df from readData function or any df with (struct, in) and (struct, out) columns
    """

    if useInputs:
        slabSource = data['struct']['in']
    else:
        slabSource = data['struct']['out']

    dataDir = directory
    slabs = {}

    # # to generate slabs

    for key, value in slabSource.iteritems():
        slabs[key] = getslab(value)

    for key, value in slabs.items():
        gen.write_gen(dataDir + "slab{}.gen".format(key), value)

    # to read slabs
    for key in data.index:
        slabs[key] = gen.read_gen(dataDir + "slab{}.gen".format(key))

    if useInputs:
        data.loc[:, ('struct', 'inslab')] = pd.Series(slabs)
    else:
        data.loc[:, ('struct', 'outslab')] = pd.Series(slabs)
def viewStructs(name, directory, kind = 'gen'):
    """
    View collection of structures as a "trajectory"

    Args:
        - name (str): substring unique to structures (.gen, POSCAR, slab, etc)
        - directory (str): Directory where the structures live
        - kind: kind of output froim list of (vasp, gen)
        
    Opens viewer with loaded trajectory (if remote, need X server)
    """
    geometries = []
    files = os.listdir(directory)

    if kind == 'gen':
        pattern = r"{}.*.gen".format(name)
    elif kind == 'vasp':
        pattern = r"{}".format(name)
    else:
        raise ValueError("file kind must be from (vasp, gen)")

    for i in files:
        key = re.search(pattern, i)

        if key:
            if kind == 'gen':
                geometries +=  [gen.read_gen(directory + i)]
            elif kind == 'vasp':
                geometries +=  [vasp.read_vasp(directory + i)]
            else:
                raise ValueError("file kind must be from (vasp, gen)")
    view(geometries)
Example #4
0
def main(basename):
    """
    Perform ML-based isotherm seeding.

    Args:
        basename: name of base slab
    """

    # load z prediction and E_ads prediction models (pickled KRR models)
    with open('models/zmodel.pkl', 'rb') as f:
        zmodel = pickle.load(f)

    with open('models/Emodel.pkl', 'rb') as f:
        Emodel = pickle.load(f)

    # load base slab, remove extraneous atoms, and wrap
    base = gen.read_gen(basename)
    del base[[atom.index for atom in base if atom.symbol in ['He', 'Ar']]]
    base.wrap()

    # generate regular grid based on cell parameters of slab
    a, b, c = base.cell
    a, b, c = np.linalg.norm(a), np.linalg.norm(b), np.linalg.norm(c)
    npoints = 20
    apoints = np.linspace(0, a, npoints)  # regular spacing
    bpoints = np.linspace(0, b, npoints)  # regular spacing

    # place He atoms in grid points
    gridpoints = []
    for apoint in apoints:
        for bpoint in bpoints:
            newstruct = base.copy()
            zhat = predictz(newstruct, apoint, bpoint)
            newstruct.append(Atom('He', position=(apoint, bpoint, zhat)))
            gridpoints += [newstruct]

    # generate pd df with data
    gridpoints = pd.Series(gridpoints)
    gridpoints = pd.DataFrame({'geom': gridpoints})
    gridpoints = pd.concat([gridpoints, getSOAPs(gridpoints['geom'])], axis=1)

    # data matrix for ML
    X = pd.DataFrame(gridpoints['SOAP'].to_list(), index=gridpoints.index)

    gridpoints['predE'] = Emodel.predict(X)

    charges = np.append(np.zeros(len(base)), gridpoints['predE'])
    base.set_initial_charges(charges)
    for geom in gridpoints['geom']:
        base.append(Atom("He", position=geom[-1].position))

    # TODO adaptive sampling portion
    if visualize:
        view(visbase)

        print(
            "pearson r:",
            pearsonr([geom[-1].position[2] for geom in gridpoints['geom']],
                     gridpoints['predE']))
Example #5
0
File: dftb.py Project: btodac/ase
    def read_results(self):
        """ all results are read from results.tag file
            It will be destroyed after it is read to avoid
            reading it once again after some runtime error """

        myfile = open(os.path.join(self.directory, 'results.tag'), 'r')
        self.lines = myfile.readlines()
        myfile.close()

        self.atoms = self.atoms_input
        charges, energy = self.read_charges_and_energy()
        if charges is not None:
            self.results['charges'] = charges
        self.results['energy'] = energy
        forces = self.read_forces()
        self.results['forces'] = forces
        self.mmpositions = None
        # Get positions and pbc's if there
        if self.parameters['Driver_OutputPrefix']:
            genname = self.parameters['Driver_OutputPrefix'] + '.gen'
        else:
            genname = 'geo_end.gen'
        output = read_gen(os.path.join(self.directory, genname))
        self.results['positions'] = output.get_positions()
        if output.get_pbc() is not None:
            self.results['pbc'] = output.get_pbc()
        # stress stuff begins
        sstring = 'stress'
        have_stress = False
        stress = list()
        for iline, line in enumerate(self.lines):
            if sstring in line:
                have_stress = True
                start = iline + 1
                end = start + 3
                for i in range(start, end):
                    cell = [float(x) for x in self.lines[i].split()]
                    stress.append(cell)
        if have_stress:
            stress = -np.array(stress) * Hartree / Bohr**3
            self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]]
        # stress stuff ends

        # eigenvalues and fermi levels
        fermi_levels = self.read_fermi_levels()
        if fermi_levels is not None:
            self.results['fermi_levels'] = fermi_levels

        eigenvalues = self.read_eigenvalues()
        if eigenvalues is not None:
            self.results['eigenvalues'] = eigenvalues

        # calculation was carried out with atoms written in write_input
        os.remove(os.path.join(self.directory, 'results.tag'))
Example #6
0
def generate_band_structure_inputs(scc_to_band_directory: dict):
    """ Given a list of directories with converged calculations, generate band structure inputs
    for DFTB+ TB Lite.
    """
    for scc_directory, bs_directory in scc_to_band_directory.items():
        # Make directory if it does not exist
        Path(bs_directory).mkdir(parents=True, exist_ok=True)

        # Copy the charges and structure
        for file in ['charges.bin', 'geometry.gen']:
            shutil.copyfile(os.path.join(scc_directory, file), os.path.join(bs_directory, file))

        # Generate a new input file, for band structure
        atoms: Atoms = read_gen(os.path.join(bs_directory, 'geometry.gen'))
        input_xml_str = generate_band_structure_input(atoms.get_cell(), 'GFN1-xTB')

        with open(os.path.join(bs_directory, 'dftb_in.hsd'), 'w') as fid:
            fid.write(input_xml_str)
Example #7
0
Includes:
    - mef 
    - cf4 
    - amorphous 
    - xtl_n 
    - xtl_si 
    - xtl2x2 
    - xtl2x2_sifterm 
    - heavy_bomb 
    - bulk222 
    - annealed 
"""
import os
from ase.io import gen, vasp
from inspect import getsourcefile

##############
# structures #
##############
path = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))
mef = vasp.read_vasp(path + "/reference_files/CONTCAR_mef")
cf4 = vasp.read_vasp(path + "/reference_files/CONTCAR_cf4")
amorphous = gen.read_gen(path + "/reference_files/amorphous_base.gen")
xtl_n = vasp.read_vasp(path + "/reference_files/CONTCAR_nrich")
xtl_si = vasp.read_vasp(path + "/reference_files/CONTCAR_sirich")
xtl2x2 = gen.read_gen(path + "/reference_files/2x2xtl.gen")
xtl2x2_sifterm = gen.read_gen(path + "/reference_files/2x2xtl_sifterm.gen")
heavy_bomb = vasp.read_vasp(path + "/reference_files/CONTCAR_heavy_bombard")
bulk222 = vasp.read_vasp(path + "/reference_files/CONTCAR_222bulk")
annealed = vasp.read_vasp(path + "/reference_files/CONTCAR_annealed_unitcell")
Example #8
0
def main(
    datadir = "temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j
    outputdir = "temp.new/",  #files for output
    hbondrange = 6, #offset from surface corresponding to Hbond range
    zmincutoff = 0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk
    output_geom_name = "output",  #prefix for output geometry files
    output_velos_name = "velos" #prefix for output velocity files
    ):

    ##############################
    ### Read in geometry files ###
    ##############################

    hbondrange = int(hbondrange)
    zmincutoff = float(zmincutoff)

    geometries = {}
    for i in os.listdir(datadir):
        if output_geom_name in i:
            key = re.search(r"\d+", i)
            if key:
                key = key.group(0)
                geometries[key] =  gen.read_gen(datadir + i)

    ##########################
    ### Read in velocities ###
    ##########################
    velos = dict()
    for i in os.listdir(datadir):
        if output_velos_name in i:
            key = re.search(r"\d+", i)
            if key:
                key = key.group(0)
                velos[key] = pd.read_csv(datadir + i, header = None, dtype = float, sep = "\s+")


    ################
    ### trimming ###
    ################

    trimmedgeoms = dict()
    trimmedvelos = dict()

    removedspecies = dict()

    for key, geom in geometries.items(): 
        removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F':0, 'C':0}

        # construct graph 
        adjmat = Analysis(geom).adjacency_matrix[0]
        numnodes = adjmat.shape[0]
        g = Graph(numnodes)
        for i in range(numnodes):
            for j in range(numnodes):
                if adjmat[i,j]:
                    g.addEdge(i,j)
        cc = g.connectedComponents()

        #identify slab, and max height of slab
        maingraph = np.array([i for i in cc if 0 in i][0])
        slab = geom[[atom.index for atom in geom if atom.index in maingraph]]
        gen.write_gen(outputdir + "slab{}.gen".format(key), slab)
        zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange
        
        # isolate fragments and identify which to remove
        fragGraphs = [i for i in cc if 0 not in i]
        fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs]
        removeFrag = [np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) 
                for i in fragZs]
        atomsToRemove = [i for g,r in zip(fragGraphs, removeFrag) if r for i in g]
        #account for any atoms that have wrapped around through the top of the cell (lookin at you, H)
        atomsToRemove += [a.index for a in geom if a.z > geom.cell[2,2]] 

        for idx in atomsToRemove:
            removedatoms[geom[idx].symbol] += 1 #tally removed atoms by species
        
        geomcopy = geom.copy()
        del geomcopy[[atom.index for atom in geomcopy if atom.index in atomsToRemove]]
        
        removedspecies[key] = pd.Series(removedatoms)
        trimmedgeoms[key] = geomcopy
        trimmedvelos[key] = velos[key][[i not in atomsToRemove for i in np.arange(len(velos[key]))]]

        
    # collect all removed species series into a df and write as csv
    pd.DataFrame(removedspecies).to_csv("removedspecies.csv")

    #write 
    for key, geom in trimmedgeoms.items():
        gen.write_gen("%sinput%s.gen" % (outputdir, key), 
            geom)
    for key, v in trimmedvelos.items():
        v.to_csv("%s%s%s.in" % (outputdir, output_velos_name, key), 
            sep = " ", index = False, header = False)
def main(cutoff,
         datapath,
         zmodelPath,
         EmodelPath,
         smallset,
         adsorbate='mef',
         npoints=20,
         outputpath='input.gen'):
    last = time.time()
    npoints = int(npoints)
    cutoff = float(cutoff)
    smallset = bool(int(smallset))
    adsorbate_types = {'mef': mef, 'cf4': cf4}
    ads = adsorbate_types[adsorbate]

    with open(zmodelPath, 'rb') as f:
        zmodel = pickle.load(f)
    with open(EmodelPath, 'rb') as f:
        Emodel = pickle.load(f)

    # read in calculated structure
    if "gen" in datapath:
        data = gen.read_gen(datapath)
    elif "CAR" in datapath:
        data = vasp.read_vasp(datapath)

    print('maxz: ', max([i.position[2] for i in data]))
    print('data read')
    now = time.time()
    print(now - last)
    last = now

    # obtain base slab
    base = getslab(data)
    # assume any adsorption influence enters via config, independent of Ar
    del base[[atom.index for atom in base if atom.symbol in ['He', 'Ar']]]
    base.wrap()

    print('base obtained')
    now = time.time()
    print(now - last)
    last = now
    # set up gridpoints with predicted z heights

    a, b, c = base.cell
    a, b, c = np.linalg.norm(a), np.linalg.norm(b), np.linalg.norm(c)
    apoints = np.linspace(0, a, npoints)
    bpoints = np.linspace(0, b, npoints)

    if smallset:
        species = ['Si', 'N', 'H', 'He']
    else:
        species = ["Si", "N", "H", "C", "F", "Ar", "He"]

    print(smallset, species)

    gridpoints = []
    for apoint in apoints:
        for bpoint in bpoints:
            newstruct = base.copy()
            print(newstruct)
            zhat = predictz(newstruct, apoint, bpoint, zmodel, species)
            newstruct.append(Atom('He', position=(apoint, bpoint, zhat)))
            gridpoints += [newstruct]

    print('gridpoints done')
    now = time.time()
    print(now - last)
    last = now

    gridpoints = pd.Series(gridpoints)
    gridpoints = pd.DataFrame({'geom': gridpoints})

    # add SOAP representation for gridpoint structs
    gridpoints = pd.concat(
        [gridpoints, getSOAPs(gridpoints['geom'], species=species)], axis=1)

    # create prediction matrix
    X = pd.DataFrame(gridpoints['SOAP'].to_list(), index=gridpoints.index)

    # predict energies, append to original df
    gridpoints['predE'] = Emodel.predict(X)

    # create 'visbase': struct with all He points included in one struct
    charges = np.append(np.zeros(len(base)), gridpoints['predE'])
    visbase = base.copy()
    for geom in gridpoints['geom']:
        visbase.append(Atom("He", position=geom[-1].position))
    visbase.set_initial_charges(charges)

    view(visbase)

    print('energy prediction done')
    now = time.time()
    print(now - last)
    last = now

    # assess gridpoints and place adsorbates
    gridpoints = gridpoints.sort_values(by='predE')
    gridpoints['xpos'] = [geom[-1].position[0] for geom in gridpoints['geom']]
    gridpoints['ypos'] = [geom[-1].position[1] for geom in gridpoints['geom']]
    gridpoints['zpos'] = [geom[-1].position[2] for geom in gridpoints['geom']]

    adsorbatePoints = []
    a = visbase.cell[0]
    b = visbase.cell[1]
    for _, row in gridpoints.iterrows():
        isclose = False
        point1 = np.array([row['xpos'], row['ypos']])
        for x, y, z in adsorbatePoints:
            for dispx in [-a, a * 0, a]:
                for dispy in [-b, b * 0, b]:
                    point2 = np.array([x, y])
                    point2 = point2 + dispx[:2] + dispy[:2]

                    if np.linalg.norm(point1 - point2) < cutoff:
                        isclose = True
        if not isclose:
            adsorbatePoints.append(np.append(point1, row['zpos']))

    print('placement done')
    now = time.time()
    print(now - last)
    last = now

    adsvisbase = base.copy()
    maxz = np.max([atom.position[2] for atom in adsvisbase])

    for point in adsorbatePoints:
        print(point[2])
        add_adsorbate(adsvisbase,
                      ads,
                      height=point[2] - maxz + 1,
                      position=(point[0], point[1]))

    gen.write_gen(outputpath, adsvisbase)

    view([data, base, adsvisbase])
Example #10
0
def read(filename, index=-1, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  ===========
    format                     short name
    =========================  ===========
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    =========================  ===========

    """
    if isinstance(filename, str):
        p = filename.rfind('@')
        if p != -1:
            try:
                index = string2index(filename[p + 1:])
            except ValueError:
                pass
            else:
                filename = filename[:p]

    if isinstance(index, str):
        index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointCalculator(energy, forces, None, magmoms,
                                           atoms)

        return atoms

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format == 'xyz':
        from ase.io.xyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'db':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammps import read_lammps_dump
        return read_lammps_dump(filename, index)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')
Example #11
0
def read(filename, index=-1, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  ===========
    format                     short name
    =========================  ===========
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    =========================  ===========

    """
    if isinstance(filename, str):
        p = filename.rfind('@')
        if p != -1:
            try:
                index = string2index(filename[p + 1:])
            except ValueError:
                pass
            else:
                filename = filename[:p]

    if isinstance(index, str):
        index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions,
                      numbers=numbers,
                      cell=cell,
                      pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointCalculator(energy, forces, None, magmoms,
                                           atoms)

        return atoms

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format == 'xyz':
        from ase.io.xyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'db':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammps import read_lammps_dump
        return read_lammps_dump(filename, index)

    raise RuntimeError('File format descriptor '+format+' not recognized!')
Example #12
0
def stichTrajectories(bombID, bombardments=1, prefix="geom.out"):
    """
    bombID (string): a string in the format of "{batch}-{sample}", eg "5-3"
    bombardments (arraylike): result of range() or np.arange() specifiying which bombardment events to use
    otherwise, should be an int, and the range (0, bombardments) will be used 
    filename (string): name of .xyz trajectory file in each replicate
    """
    startTime = time.time()
    bombardments = bombardments if hasattr(
        bombardments, '__iter__') else np.arange(bombardments)
    batch, sample = bombID.split("-")

    # Create an empty array to populate with the trajectory

    print("bombardments:", bombardments)
    nMaxAtoms = np.max([
        len(
            gen.read_gen("{}/{}/{}/{}/geom.out.gen".format(
                _b, step, batch, sample, prefix))) for _b in bombardments
        for step in ['bomb', 'quench', 'eq']
    ])

    # nMaxAtoms is guaranteed to correspond to some frame with an Ar in it
    # then, I basically generate nBombardments slots at the bottom of the df for the Ar atoms introduced
    nMaxAtoms = nMaxAtoms - 1 + len(bombardments)
    print("nMaxAtoms: ", nMaxAtoms)

    #create the final matrix that will actually represent stitched traj
    trajFrame = np.zeros((nMaxAtoms, 0), dtype=object)

    frameIdx = 0  #initialize global frame count

    for _b in bombardments:
        with open("{}/bomb/{}/{}/{}.xyz".format(_b, batch, sample,
                                                prefix)) as f1:
            with open("{}/quench/{}/{}/{}.xyz".format(_b, batch, sample,
                                                      prefix)) as f2:
                with open("{}/eq/{}/{}/{}.xyz".format(_b, batch, sample,
                                                      prefix)) as f3:
                    #                     print("{}/bomb/{}/{}/{}.xyz".format(_b, batch, sample, prefix))
                    _btemp = list(extxyz.read_extxyz(f1, index=slice(0, None)))
                    _qtemp = list(extxyz.read_extxyz(f2, index=slice(0, None)))
                    _etemp = list(extxyz.read_extxyz(f3, index=slice(0, None)))
                    trajList = _btemp + _qtemp + _etemp  #list form of this thing

                    if _b == bombardments[0]:
                        lastLen = len(
                            _btemp[0]
                        )  # initialize lastLen in the very first frame
                        #                     fragNames, fragIdxs = getFragsTraj(trajList)
                        newIndices = np.arange(lastLen)
                    for frame in trajList:
                        #                         try:
                        #                         print(frame)
                        frame.set_momenta(frame.get_masses().reshape(-1, 1) *
                                          frame.arrays['vel'])
                        #                         except:
                        #                             pass
                        #                             print("no velocity data")
                        trajFrame = np.hstack(
                            (trajFrame, np.zeros((nMaxAtoms, 1),
                                                 dtype=object)))
                        if len(
                                frame
                        ) != lastLen:  # check for changes in nAtoms present in frame; indicative of step change
                            #                             print("entered if block for ", frameIdx)
                            newIndices = reindexTrajBreak(
                                prevFrame, frame, newIndices)
                            ArAdded = (frame[-1].symbol == "Ar" and np.all(
                                frame[-1].position != prevFrame[-1].position))
                            if ArAdded:
                                newIndices[-1] = nMaxAtoms - (
                                    len(bombardments) - _b)

                            lastLen = len(frame)
                        trajFrame[newIndices, frameIdx] = frame
                        prevFrame = frame
                        frameIdx += 1
#                     trajFrame[trajFrame == 0] = np.nan
    endTime = time.time()
    print("execution time (s) = {}".format((endTime - startTime)))
    return pd.DataFrame(trajFrame)
Example #13
0
def main(file):
    view(gen.read_gen(file))
Example #14
0
def main(file, output):
    vasp.write_vasp(output, gen.read_gen(file), sort=True, vasp5=True)
Example #15
0
def read(filename, index=None, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  =============
    format                     short name
    =========================  =============
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    VASP XDATCAR file          vasp_xdatcar
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    EON reactant.con file      eon
    Gromacs coordinates        gro
    Gaussian com (input) file  gaussian
    Gaussian output file       gaussian_out
    Quantum espresso in file   esp_in
    Quantum espresso out file  esp_out
    Extended XYZ file          extxyz
    NWChem input file          nw
    Materials Studio file      xsd
    =========================  =============

    Many formats allow on open file-like object to be passed instead
    of ``filename``. In this case the format cannot be auto-decected,
    so the ``format`` argument should be explicitly given.
    
    """
    if isinstance(filename, str) and (
        '.json@' in filename or
        '.db@' in filename or
        filename.startswith('pg://') and '@' in filename):
        filename, index = filename.rsplit('@', 1)
        if index.isdigit():
            index = int(index)
    else:
        if isinstance(filename, str):
            p = filename.rfind('@')
            if p != -1:
                try:
                    index = string2index(filename[p + 1:])
                except ValueError:
                    pass
                else:
                    filename = filename[:p]

        if isinstance(index, str):
            index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions,
                      numbers=numbers,
                      cell=cell,
                      pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointDFTCalculator(atoms, energy=energy,
                                              forces=forces, magmoms=magmoms)
        kpts = []
        if r.has_array('IBZKPoints'):
            for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'),
                                          r.get('IBZKPoints'),
                                          r.get('Eigenvalues'),
                                          r.get('OccupationNumbers')):
                kpts.append(SinglePointKPoint(w, kpt[0], kpt[1],
                                              eps_n[0], f_n[0]))
        atoms.calc.kpts = kpts

        return atoms

    if format in ['json', 'db', 'postgresql']:
        if index == slice(None, None):
            index = None
        from ase.db.core import connect
        images = [row.toatoms()
                  for row in connect(filename, format).select(index)]
        if len(images) == 1:
            return images[0]
        else:
            return images

    if index is None:
        index = -1
        
    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format in ['xyz', 'extxyz']:
        from ase.io.extxyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'trj':
        from ase.io.pickletrajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'vasp_xdatcar':
        from ase.io.vasp import read_vasp_xdatcar
        return read_vasp_xdatcar(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'cmr':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammpsrun import read_lammps_dump
        return read_lammps_dump(filename, index)

    if format == 'eon':
        from ase.io.eon import read_reactant_con
        return read_reactant_con(filename)

    if format == 'gromacs':
        from ase.io.gromacs import read_gromacs
        return read_gromacs(filename)

    if format == 'gaussian':
        from ase.io.gaussian import read_gaussian
        return read_gaussian(filename)

    if format == 'gaussian_out':
        from ase.io.gaussian import read_gaussian_out
        return read_gaussian_out(filename, index)

    if format == 'esp_in':
        from ase.io.espresso import read_espresso_in
        return read_espresso_in(filename)

    if format == 'esp_out':
        from ase.io.espresso import read_espresso_out
        return read_espresso_out(filename, index)

    if format == 'nw':
        from ase.io.nwchem import read_nwchem_input
        return read_nwchem_input(filename)

    if format == 'xsd':
        from ase.io.xsd import read_xsd
        return read_xsd(filename)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')
Example #16
0
def read(filename, index=None, format=None):
    """Read Atoms object(s) from file.

    filename: str
        Name of the file to read from.
    index: int or slice
        If the file contains several configurations, the last configuration
        will be returned by default.  Use index=n to get configuration
        number n (counting from zero).
    format: str
        Used to specify the file-format.  If not given, the
        file-format will be guessed by the *filetype* function.

    Known formats:

    =========================  =============
    format                     short name
    =========================  =============
    GPAW restart-file          gpw
    Dacapo netCDF output file  dacapo
    Old ASE netCDF trajectory  nc
    Virtual Nano Lab file      vnl
    ASE pickle trajectory      traj
    ASE bundle trajectory      bundle
    GPAW text output           gpaw-text
    CUBE file                  cube
    XCrySDen Structure File    xsf
    Dacapo text output         dacapo-text
    XYZ-file                   xyz
    VASP POSCAR/CONTCAR file   vasp
    VASP OUTCAR file           vasp_out
    SIESTA STRUCT file         struct_out
    ABINIT input file          abinit
    V_Sim ascii file           v_sim
    Protein Data Bank          pdb
    CIF-file                   cif
    FHI-aims geometry file     aims
    FHI-aims output file       aims_out
    VTK XML Image Data         vti
    VTK XML Structured Grid    vts
    VTK XML Unstructured Grid  vtu
    TURBOMOLE coord file       tmol
    TURBOMOLE gradient file    tmol-gradient
    exciting input             exi
    AtomEye configuration      cfg
    WIEN2k structure file      struct
    DftbPlus input file        dftb
    CASTEP geom file           cell
    CASTEP output file         castep
    CASTEP trajectory file     geom
    ETSF format                etsf.nc
    DFTBPlus GEN format        gen
    CMR db/cmr-file            db
    CMR db/cmr-file            cmr
    LAMMPS dump file           lammps
    EON reactant.con file      eon
    Gromacs coordinates        gro
    Gaussian com (input) file  gaussian
    Gaussian output file       gaussian_out
    Quantum espresso in file   esp_in
    Quantum espresso out file  esp_out
    Extended XYZ file          extxyz
    NWChem input file          nw
    =========================  =============

    """
    if isinstance(filename,
                  str) and ('.json@' in filename or '.db@' in filename or
                            filename.startswith('pg://') and '@' in filename):
        filename, index = filename.rsplit('@', 1)
        if index.isdigit():
            index = int(index)
    else:
        if isinstance(filename, str):
            p = filename.rfind('@')
            if p != -1:
                try:
                    index = string2index(filename[p + 1:])
                except ValueError:
                    pass
                else:
                    filename = filename[:p]

        if isinstance(index, str):
            index = string2index(index)

    if format is None:
        format = filetype(filename)

    if format.startswith('gpw'):
        import gpaw
        r = gpaw.io.open(filename, 'r')
        positions = r.get('CartesianPositions') * Bohr
        numbers = r.get('AtomicNumbers')
        cell = r.get('UnitCell') * Bohr
        pbc = r.get('BoundaryConditions')
        tags = r.get('Tags')
        magmoms = r.get('MagneticMoments')
        energy = r.get('PotentialEnergy') * Hartree

        if r.has_array('CartesianForces'):
            forces = r.get('CartesianForces') * Hartree / Bohr
        else:
            forces = None

        atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc)
        if tags.any():
            atoms.set_tags(tags)

        if magmoms.any():
            atoms.set_initial_magnetic_moments(magmoms)
        else:
            magmoms = None

        atoms.calc = SinglePointDFTCalculator(atoms,
                                              energy=energy,
                                              forces=forces,
                                              magmoms=magmoms)
        kpts = []
        if r.has_array('IBZKPoints'):
            for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'),
                                          r.get('IBZKPoints'),
                                          r.get('Eigenvalues'),
                                          r.get('OccupationNumbers')):
                kpts.append(
                    SinglePointKPoint(w, kpt[0], kpt[1], eps_n[0], f_n[0]))
        atoms.calc.kpts = kpts

        return atoms

    if format in ['json', 'db', 'postgresql']:
        from ase.db.core import connect, dict2atoms
        if index == slice(None, None):
            index = None
        images = [
            dict2atoms(d) for d in connect(filename, format).select(index)
        ]
        if len(images) == 1:
            return images[0]
        else:
            return images

    if index is None:
        index = -1

    if format == 'castep':
        from ase.io.castep import read_castep
        return read_castep(filename, index)

    if format == 'castep_cell':
        import ase.io.castep
        return ase.io.castep.read_cell(filename, index)

    if format == 'castep_geom':
        import ase.io.castep
        return ase.io.castep.read_geom(filename, index)

    if format == 'exi':
        from ase.io.exciting import read_exciting
        return read_exciting(filename, index)

    if format in ['xyz', 'extxyz']:
        from ase.io.extxyz import read_xyz
        return read_xyz(filename, index)

    if format == 'traj':
        from ase.io.trajectory import read_trajectory
        return read_trajectory(filename, index)

    if format == 'bundle':
        from ase.io.bundletrajectory import read_bundletrajectory
        return read_bundletrajectory(filename, index)

    if format == 'cube':
        from ase.io.cube import read_cube
        return read_cube(filename, index)

    if format == 'nc':
        from ase.io.netcdf import read_netcdf
        return read_netcdf(filename, index)

    if format == 'gpaw-text':
        from ase.io.gpawtext import read_gpaw_text
        return read_gpaw_text(filename, index)

    if format == 'dacapo-text':
        from ase.io.dacapo import read_dacapo_text
        return read_dacapo_text(filename)

    if format == 'dacapo':
        from ase.io.dacapo import read_dacapo
        return read_dacapo(filename)

    if format == 'xsf':
        from ase.io.xsf import read_xsf
        return read_xsf(filename, index)

    if format == 'vasp':
        from ase.io.vasp import read_vasp
        return read_vasp(filename)

    if format == 'vasp_out':
        from ase.io.vasp import read_vasp_out
        return read_vasp_out(filename, index)

    if format == 'abinit':
        from ase.io.abinit import read_abinit
        return read_abinit(filename)

    if format == 'v_sim':
        from ase.io.v_sim import read_v_sim
        return read_v_sim(filename)

    if format == 'mol':
        from ase.io.mol import read_mol
        return read_mol(filename)

    if format == 'pdb':
        from ase.io.pdb import read_pdb
        return read_pdb(filename, index)

    if format == 'cif':
        from ase.io.cif import read_cif
        return read_cif(filename, index)

    if format == 'struct':
        from ase.io.wien2k import read_struct
        return read_struct(filename)

    if format == 'struct_out':
        from ase.io.siesta import read_struct
        return read_struct(filename)

    if format == 'vti':
        from ase.io.vtkxml import read_vti
        return read_vti(filename)

    if format == 'vts':
        from ase.io.vtkxml import read_vts
        return read_vts(filename)

    if format == 'vtu':
        from ase.io.vtkxml import read_vtu
        return read_vtu(filename)

    if format == 'aims':
        from ase.io.aims import read_aims
        return read_aims(filename)

    if format == 'aims_out':
        from ase.io.aims import read_aims_output
        return read_aims_output(filename, index)

    if format == 'iwm':
        from ase.io.iwm import read_iwm
        return read_iwm(filename)

    if format == 'Cmdft':
        from ase.io.cmdft import read_I_info
        return read_I_info(filename)

    if format == 'tmol':
        from ase.io.turbomole import read_turbomole
        return read_turbomole(filename)

    if format == 'tmol-gradient':
        from ase.io.turbomole import read_turbomole_gradient
        return read_turbomole_gradient(filename)

    if format == 'cfg':
        from ase.io.cfg import read_cfg
        return read_cfg(filename)

    if format == 'dftb':
        from ase.io.dftb import read_dftb
        return read_dftb(filename)

    if format == 'sdf':
        from ase.io.sdf import read_sdf
        return read_sdf(filename)

    if format == 'etsf':
        from ase.io.etsf import ETSFReader
        return ETSFReader(filename).read_atoms()

    if format == 'gen':
        from ase.io.gen import read_gen
        return read_gen(filename)

    if format == 'cmr':
        from ase.io.cmr_io import read_db
        return read_db(filename, index)

    if format == 'lammps':
        from ase.io.lammpsrun import read_lammps_dump
        return read_lammps_dump(filename, index)

    if format == 'eon':
        from ase.io.eon import read_reactant_con
        return read_reactant_con(filename)

    if format == 'gromacs':
        from ase.io.gromacs import read_gromacs
        return read_gromacs(filename)

    if format == 'gaussian':
        from ase.io.gaussian import read_gaussian
        return read_gaussian(filename)

    if format == 'gaussian_out':
        from ase.io.gaussian import read_gaussian_out
        return read_gaussian_out(filename, index)

    if format == 'esp_in':
        from ase.io.espresso import read_espresso_in
        return read_espresso_in(filename)

    if format == 'esp_out':
        from ase.io.espresso import read_espresso_out
        return read_espresso_out(filename, index)

    if format == 'nw':
        from ase.io.nwchem import read_nwchem_input
        return read_nwchem_input(filename)

    raise RuntimeError('File format descriptor ' + format + ' not recognized!')
Example #17
0
def main(
        numsofar,  #use the run number you're seeding for
        batch,  #current batch number
        velo,  # velocity of incident Ar in Å/ps
        datadir="temp/",  #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j
        outputdir="temp.new/",  #files for output
        hbondrange=6,
        zmincutoff=0.1,  #somewhat arbitrary value to get rid of atoms that have gone into bulk
        numperbatch=17,
        numbatches=10):

    numsofar = int(numsofar)
    batch = int(batch)
    velo = float(velo)

    hbondrange = int(hbondrange)
    zmincutoff = float(zmincutoff)
    numperbatch = int(numperbatch)
    numbatches = int(numbatches)

    ##############################
    ### Read in geometry files ###
    ##############################

    geometries = {}
    for i in os.listdir(datadir):
        if "output" in i:
            key = re.search(r"\d+", i)
            if key:
                key = key.group(0)
                geometries[key] = gen.read_gen(datadir + i)

    ##########################
    ### Read in velocities ###
    ##########################
    velos = dict()
    for i in os.listdir(datadir):
        if "velos" in i:
            key = re.search(r"\d+", i)
            if key:
                key = key.group(0)
                velos[key] = pd.read_csv(datadir + i,
                                         header=None,
                                         dtype=float,
                                         sep="\s+")

    # to account for seed behavior from first numssofar sets of runs
    # numssofar can also be interpreted as = current run seeding for
    np.random.seed(429)
    for b in range(batch + numsofar * numbatches):
        for i in range(numperbatch):
            x_rand, y_rand, z_rand = np.append(np.random.random(size=2), 0)

    ################
    ### trimming ###
    ################

    trimmedgeoms = dict()
    trimmedvelos = dict()

    removedspecies = dict()

    for key, geom in geometries.items():
        removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F': 0, 'C': 0}

        # construct graph
        adjmat = Analysis(geom).adjacency_matrix[0]
        numnodes = adjmat.shape[0]
        g = Graph(numnodes)
        for i in range(numnodes):
            for j in range(numnodes):
                if adjmat[i, j]:
                    g.addEdge(i, j)
        cc = g.connectedComponents()

        #identify slab, and max height of slab
        maingraph = np.array([i for i in cc if 0 in i][0])
        slab = geom[[atom.index for atom in geom if atom.index in maingraph]]
        gen.write_gen(outputdir + "slab{}.gen".format(key), slab)
        zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange

        # isolate fragments and identify which to remove
        fragGraphs = [i for i in cc if 0 not in i]
        fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs]
        removeFrag = [
            np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff)
            for i in fragZs
        ]
        atomsToRemove = [
            i for g, r in zip(fragGraphs, removeFrag) if r for i in g
        ]
        #account for any atoms that have wrapped around through the top of the cell (lookin at you, H)
        atomsToRemove += [a.index for a in geom if a.z > geom.cell[2, 2]]
        for idx in atomsToRemove:
            removedatoms[
                geom[idx].symbol] += 1  #tally removed atoms by species

        geomcopy = geom.copy()
        del geomcopy[[
            atom.index for atom in geomcopy if atom.index in atomsToRemove
        ]]

        x_rand, y_rand, z_rand = geomcopy.cell.cartesian_positions(
            np.append(np.random.random(size=2), 0))

        add_adsorbate(geomcopy,
                      adsorbate='Ar',
                      height=7,
                      position=(x_rand, y_rand))

        removedspecies[key] = pd.Series(removedatoms)
        trimmedgeoms[key] = geomcopy
        trimmedvelos[key] = velos[key][[
            i not in atomsToRemove for i in np.arange(len(velos[key]))
        ]]
        trimmedvelos[key] = trimmedvelos[key].append(pd.Series([0, 0, -velo]),
                                                     ignore_index=True)

    # collect all removed species series into a df and write as csv
    pd.DataFrame(removedspecies).to_csv("removedspecies.csv")

    #write
    for key, geom in trimmedgeoms.items():
        gen.write_gen("%sinput%s.gen" % (outputdir, key), geom)
    for key, v in trimmedvelos.items():
        v.to_csv("%svelos%s.in" % (outputdir, key),
                 sep=" ",
                 index=False,
                 header=False)