def getslabs(data, directory, useInputs=False): """ Utility for getting and writing slab files from readData (utils.py) function data is the df from readData function or any df with (struct, in) and (struct, out) columns """ if useInputs: slabSource = data['struct']['in'] else: slabSource = data['struct']['out'] dataDir = directory slabs = {} # # to generate slabs for key, value in slabSource.iteritems(): slabs[key] = getslab(value) for key, value in slabs.items(): gen.write_gen(dataDir + "slab{}.gen".format(key), value) # to read slabs for key in data.index: slabs[key] = gen.read_gen(dataDir + "slab{}.gen".format(key)) if useInputs: data.loc[:, ('struct', 'inslab')] = pd.Series(slabs) else: data.loc[:, ('struct', 'outslab')] = pd.Series(slabs)
def randomGrid(adsorbate, slab, h=2, outputDir="tempout/", numDirs=10, runsPerDir=17, shallow=False, kind='gen'): """ Produces a collection of structures with adsorbate randomly placed on given slab. Writes (.gen or vasp) outputs to desired directory Args: adsorbate: Atoms obj of adsorbate slab: Atoms obj of slab h: height of adsorbate (above max slab position) outputDir: Path (str) for desired output location. numDirs: Number of batches. Defaults to 10. runsPerDir: Number of sims per batch. Defaults to 17. shallow: If all runs to be at one directory level. Defaults to False. kind: output desired, from (gen, vasp). Defaults to gen. Returns: None """ np.random.seed(429) for d in range(numDirs): for run in range(runsPerDir): s = slab.copy() # generate random positions with dummy z required (3) p = s.cell.cartesian_positions(np.random.random(3)) # construct and write add_adsorbate(s, adsorbate, height=h, position=p[:2]) if kind == 'gen': outname = "input{}-{}.gen".format( d, run) if not shallow else "input{}.gen".format( d * runsPerDir + run) gen.write_gen(outputDir + outname, s) elif kind == 'vasp': outname = "POSCAR{}-{}".format( d, run) if not shallow else "POSCAR{}".format(d * runsPerDir + run) vasp.write_vasp(outputDir + outname, s, sort=True, vasp5=True) else: raise AssertionError("kind should be from (vasp, gen)")
def write(filename, images, format=None, **kwargs): """Write Atoms object(s) to file. filename: str Name of the file to write to. images: Atoms object or list of Atoms objects A single Atoms object or a list of Atoms objects. format: str Used to specify the file-format. If not given, the file-format will be taken from suffix of the filename. The accepted output formats: ========================= =========== format short name ========================= =========== ASE pickle trajectory traj ASE bundle trajectory bundle CUBE file cube XYZ-file xyz VASP POSCAR/CONTCAR file vasp ABINIT input file abinit Protein Data Bank pdb CIF-file cif XCrySDen Structure File xsf FHI-aims geometry file aims gOpenMol .plt file plt Python script py Encapsulated Postscript eps Portable Network Graphics png Persistance of Vision pov VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol exciting exi AtomEye configuration cfg WIEN2k structure file struct CASTEP cell file cell DftbPlus input file dftb ETSF etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr EON reactant.con file eon Gromacs coordinates gro GROMOS96 (only positions) g96 X3D x3d X3DOM HTML html Extended XYZ file extxyz ========================= =========== The use of additional keywords is format specific. The ``cube`` and ``plt`` formats accept (plt requires it) a ``data`` keyword, which can be used to write a 3D array to the file along with the nuclei coordinates. The ``vti``, ``vts`` and ``vtu`` formats are all specifically directed for use with MayaVi, and the latter is designated for visualization of the atoms whereas the two others are intended for volume data. Further, it should be noted that the ``vti`` format is intended for orthogonal unit cells as only the grid-spacing is stored, whereas the ``vts`` format additionally stores the coordinates of each grid point, thus making it useful for volume date in more general unit cells. The ``eps``, ``png``, and ``pov`` formats are all graphics formats, and accept the additional keywords: rotation: str (default '') The rotation angles, e.g. '45x,70y,90z'. show_unit_cell: int (default 0) Can be 0, 1, 2 to either not show, show, or show all of the unit cell. radii: array or float (default 1.0) An array of same length as the list of atoms indicating the sphere radii. A single float specifies a uniform scaling of the default covalent radii. bbox: 4 floats (default None) Set the bounding box to (xll, yll, xur, yur) (lower left, upper right). colors: array (default None) An array of same length as the list of atoms, indicating the rgb color code for each atom. Default is the jmol_colors of ase/data/colors. scale: int (default 20) Number of pixels per Angstrom. For the ``pov`` graphics format, ``scale`` should not be specified. The elements of the color array can additionally be strings, or 4 and 5 vectors for named colors, rgb + filter, and rgb + filter + transmit specification. This format accepts the additional keywords: ``run_povray``, ``display``, ``pause``, ``transparent``, ``canvas_width``, ``canvas_height``, ``camera_dist``, ``image_plane``, ``camera_type``, ``point_lights``, ``area_light``, ``background``, ``textures``, ``celllinewidth``, ``bondlinewidth``, ``bondatoms`` The ``xyz`` format accepts a comment string using the ``comment`` keyword: comment: str (default '') Optional comment written on the second line of the file. """ if format is None: if filename == '-': format = 'xyz' filename = sys.stdout elif 'POSCAR' in filename or 'CONTCAR' in filename: format = 'vasp' elif 'OUTCAR' in filename: format = 'vasp_out' elif filename.endswith('etsf.nc'): format = 'etsf' elif filename.lower().endswith('.con'): format = 'eon' elif os.path.basename(filename) == 'coord': format = 'tmol' else: suffix = filename.split('.')[-1] format = { 'cell': 'castep_cell', }.get(suffix, suffix) # XXX this does not make sense # Maybe like this: ## format = {'traj': 'trajectory', ## 'nc': 'netcdf', ## 'exi': 'exciting', ## 'in': 'aims', ## 'tmol': 'turbomole', ## }.get(suffix, suffix) if format in ['json', 'db']: from ase.db import connect connect(filename, format).write(images) return if format == 'castep_cell': from ase.io.castep import write_cell write_cell(filename, images, **kwargs) return if format == 'exi': from ase.io.exciting import write_exciting write_exciting(filename, images) return if format == 'cif': from ase.io.cif import write_cif write_cif(filename, images) if format == 'xyz': from ase.io.extxyz import write_xyz write_xyz(filename, images, columns=['symbols', 'positions'], write_info=False, **kwargs) return if format == 'extxyz': from ase.io.extxyz import write_xyz write_xyz(filename, images, **kwargs) return if format == 'gen': from ase.io.gen import write_gen write_gen(filename, images) return elif format == 'in': format = 'aims' elif format == 'tmol': from ase.io.turbomole import write_turbomole write_turbomole(filename, images) return elif format == 'dftb': from ase.io.dftb import write_dftb write_dftb(filename, images) return elif format == 'struct': from ase.io.wien2k import write_struct write_struct(filename, images, **kwargs) return elif format == 'findsym': from ase.io.findsym import write_findsym write_findsym(filename, images) return elif format == 'etsf': from ase.io.etsf import ETSFWriter writer = ETSFWriter(filename) if not isinstance(images, (list, tuple)): images = [images] writer.write_atoms(images[0]) writer.close() return elif format == 'cmr': from ase.io.cmr_io import write_db return write_db(filename, images, **kwargs) elif format == 'eon': from ase.io.eon import write_reactant_con write_reactant_con(filename, images) return elif format == 'gro': from ase.io.gromacs import write_gromacs write_gromacs(filename, images) return elif format == 'g96': from ase.io.gromos import write_gromos write_gromos(filename, images) return elif format == 'html': from ase.io.x3d import write_html write_html(filename, images) return format = { 'traj': 'trajectory', 'nc': 'netcdf', 'bundle': 'bundletrajectory' }.get(format, format) name = 'write_' + format if format in ['vti', 'vts', 'vtu']: format = 'vtkxml' if format is None: format = filetype(filename) try: write = getattr(__import__('ase.io.%s' % format, {}, {}, [name]), name) except ImportError: raise TypeError('Unknown format: "%s".' % format) write(filename, images, **kwargs)
def write(filename, images, format=None, **kwargs): """Write Atoms object(s) to file. filename: str Name of the file to write to. images: Atoms object or list of Atoms objects A single Atoms object or a list of Atoms objects. format: str Used to specify the file-format. If not given, the file-format will be taken from suffix of the filename. The accepted output formats: ========================= =========== format short name ========================= =========== ASE pickle trajectory traj ASE bundle trajectory bundle CUBE file cube XYZ-file xyz VASP POSCAR/CONTCAR file vasp ABINIT input file abinit Protein Data Bank pdb CIF-file cif XCrySDen Structure File xsf FHI-aims geometry file aims gOpenMol .plt file plt Python script py Encapsulated Postscript eps Portable Network Graphics png Persistance of Vision pov VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol exciting exi AtomEye configuration cfg WIEN2k structure file struct CASTEP cell file cell DftbPlus input file dftb ETSF etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr EON reactant.con file eon Gromacs coordinates gro GROMOS96 (only positions) g96 X3D x3d X3DOM HTML html Extended XYZ file extxyz ========================= =========== Many formats allow on open file-like object to be passed instead of ``filename``. In this case the format cannot be auto-decected, so the ``format`` argument should be explicitly given. The use of additional keywords is format specific. The ``cube`` and ``plt`` formats accept (plt requires it) a ``data`` keyword, which can be used to write a 3D array to the file along with the nuclei coordinates. The ``vti``, ``vts`` and ``vtu`` formats are all specifically directed for use with MayaVi, and the latter is designated for visualization of the atoms whereas the two others are intended for volume data. Further, it should be noted that the ``vti`` format is intended for orthogonal unit cells as only the grid-spacing is stored, whereas the ``vts`` format additionally stores the coordinates of each grid point, thus making it useful for volume date in more general unit cells. The ``eps``, ``png``, and ``pov`` formats are all graphics formats, and accept the additional keywords: rotation: str (default '') The rotation angles, e.g. '45x,70y,90z'. show_unit_cell: int (default 0) Can be 0, 1, 2 to either not show, show, or show all of the unit cell. radii: array or float (default 1.0) An array of same length as the list of atoms indicating the sphere radii. A single float specifies a uniform scaling of the default covalent radii. bbox: 4 floats (default None) Set the bounding box to (xll, yll, xur, yur) (lower left, upper right). colors: array (default None) An array of same length as the list of atoms, indicating the rgb color code for each atom. Default is the jmol_colors of ase/data/colors. scale: int (default 20) Number of pixels per Angstrom. For the ``pov`` graphics format, ``scale`` should not be specified. The elements of the color array can additionally be strings, or 4 and 5 vectors for named colors, rgb + filter, and rgb + filter + transmit specification. This format accepts the additional keywords: ``run_povray``, ``display``, ``pause``, ``transparent``, ``canvas_width``, ``canvas_height``, ``camera_dist``, ``image_plane``, ``camera_type``, ``point_lights``, ``area_light``, ``background``, ``textures``, ``celllinewidth``, ``bondlinewidth``, ``bondatoms`` The ``xyz`` format accepts a comment string using the ``comment`` keyword: comment: str (default '') Optional comment written on the second line of the file. """ if format is None: if filename == '-': format = 'xyz' filename = sys.stdout elif 'POSCAR' in filename or 'CONTCAR' in filename: format = 'vasp' elif 'OUTCAR' in filename: format = 'vasp_out' elif filename.endswith('etsf.nc'): format = 'etsf' elif filename.lower().endswith('.con'): format = 'eon' elif os.path.basename(filename) == 'coord': format = 'tmol' else: suffix = filename.split('.')[-1] format = {'cell': 'castep_cell', }.get(suffix, suffix) # XXX this does not make sense # Maybe like this: ## format = {'traj': 'trajectory', ## 'nc': 'netcdf', ## 'exi': 'exciting', ## 'in': 'aims', ## 'tmol': 'turbomole', ## }.get(suffix, suffix) if format in ['json', 'db']: from ase.db import connect connect(filename, format).write(images) return if format == 'castep_cell': from ase.io.castep import write_cell write_cell(filename, images, **kwargs) return if format == 'exi': from ase.io.exciting import write_exciting write_exciting(filename, images) return if format == 'cif': from ase.io.cif import write_cif write_cif(filename, images) if format == 'xyz': from ase.io.extxyz import write_xyz write_xyz(filename, images, columns=['symbols', 'positions'], write_info=False, write_results=False, **kwargs) return if format == 'extxyz': from ase.io.extxyz import write_xyz write_xyz(filename, images, **kwargs) return if format == 'gen': from ase.io.gen import write_gen write_gen(filename, images) return elif format == 'in': format = 'aims' elif format == 'tmol': from ase.io.turbomole import write_turbomole write_turbomole(filename, images) return elif format == 'dftb': from ase.io.dftb import write_dftb write_dftb(filename, images) return elif format == 'struct': from ase.io.wien2k import write_struct write_struct(filename, images, **kwargs) return elif format == 'findsym': from ase.io.findsym import write_findsym write_findsym(filename, images) return elif format == 'etsf': from ase.io.etsf import ETSFWriter writer = ETSFWriter(filename) if not isinstance(images, (list, tuple)): images = [images] writer.write_atoms(images[0]) writer.close() return elif format == 'cmr': from ase.io.cmr_io import write_db return write_db(filename, images, **kwargs) elif format == 'eon': from ase.io.eon import write_reactant_con write_reactant_con(filename, images) return elif format == 'gro': from ase.io.gromacs import write_gromacs write_gromacs(filename, images) return elif format == 'g96': from ase.io.gromos import write_gromos write_gromos(filename, images) return elif format == 'html': from ase.io.x3d import write_html write_html(filename, images) return format = {'traj': 'trajectory', 'nc': 'netcdf', 'bundle': 'bundletrajectory' }.get(format, format) name = 'write_' + format if format in ['vti', 'vts', 'vtu']: format = 'vtkxml' elif format == 'trj': name = 'write_trajectory' format = 'pickletrajectory' elif format is None: format = filetype(filename) try: write = getattr(__import__('ase.io.%s' % format, {}, {}, [name]), name) except ImportError: raise TypeError('Unknown format: "%s".' % format) write(filename, images, **kwargs)
def main( datadir = "temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir = "temp.new/", #files for output hbondrange = 6, #offset from surface corresponding to Hbond range zmincutoff = 0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk output_geom_name = "output", #prefix for output geometry files output_velos_name = "velos" #prefix for output velocity files ): ############################## ### Read in geometry files ### ############################## hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) geometries = {} for i in os.listdir(datadir): if output_geom_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if output_velos_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header = None, dtype = float, sep = "\s+") ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F':0, 'C':0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i,j]: g.addEdge(i,j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs] atomsToRemove = [i for g,r in zip(fragGraphs, removeFrag) if r for i in g] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2,2]] for idx in atomsToRemove: removedatoms[geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[atom.index for atom in geomcopy if atom.index in atomsToRemove]] removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[i not in atomsToRemove for i in np.arange(len(velos[key]))]] # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%s%s%s.in" % (outputdir, output_velos_name, key), sep = " ", index = False, header = False)
def main(cutoff, datapath, zmodelPath, EmodelPath, smallset, adsorbate='mef', npoints=20, outputpath='input.gen'): last = time.time() npoints = int(npoints) cutoff = float(cutoff) smallset = bool(int(smallset)) adsorbate_types = {'mef': mef, 'cf4': cf4} ads = adsorbate_types[adsorbate] with open(zmodelPath, 'rb') as f: zmodel = pickle.load(f) with open(EmodelPath, 'rb') as f: Emodel = pickle.load(f) # read in calculated structure if "gen" in datapath: data = gen.read_gen(datapath) elif "CAR" in datapath: data = vasp.read_vasp(datapath) print('maxz: ', max([i.position[2] for i in data])) print('data read') now = time.time() print(now - last) last = now # obtain base slab base = getslab(data) # assume any adsorption influence enters via config, independent of Ar del base[[atom.index for atom in base if atom.symbol in ['He', 'Ar']]] base.wrap() print('base obtained') now = time.time() print(now - last) last = now # set up gridpoints with predicted z heights a, b, c = base.cell a, b, c = np.linalg.norm(a), np.linalg.norm(b), np.linalg.norm(c) apoints = np.linspace(0, a, npoints) bpoints = np.linspace(0, b, npoints) if smallset: species = ['Si', 'N', 'H', 'He'] else: species = ["Si", "N", "H", "C", "F", "Ar", "He"] print(smallset, species) gridpoints = [] for apoint in apoints: for bpoint in bpoints: newstruct = base.copy() print(newstruct) zhat = predictz(newstruct, apoint, bpoint, zmodel, species) newstruct.append(Atom('He', position=(apoint, bpoint, zhat))) gridpoints += [newstruct] print('gridpoints done') now = time.time() print(now - last) last = now gridpoints = pd.Series(gridpoints) gridpoints = pd.DataFrame({'geom': gridpoints}) # add SOAP representation for gridpoint structs gridpoints = pd.concat( [gridpoints, getSOAPs(gridpoints['geom'], species=species)], axis=1) # create prediction matrix X = pd.DataFrame(gridpoints['SOAP'].to_list(), index=gridpoints.index) # predict energies, append to original df gridpoints['predE'] = Emodel.predict(X) # create 'visbase': struct with all He points included in one struct charges = np.append(np.zeros(len(base)), gridpoints['predE']) visbase = base.copy() for geom in gridpoints['geom']: visbase.append(Atom("He", position=geom[-1].position)) visbase.set_initial_charges(charges) view(visbase) print('energy prediction done') now = time.time() print(now - last) last = now # assess gridpoints and place adsorbates gridpoints = gridpoints.sort_values(by='predE') gridpoints['xpos'] = [geom[-1].position[0] for geom in gridpoints['geom']] gridpoints['ypos'] = [geom[-1].position[1] for geom in gridpoints['geom']] gridpoints['zpos'] = [geom[-1].position[2] for geom in gridpoints['geom']] adsorbatePoints = [] a = visbase.cell[0] b = visbase.cell[1] for _, row in gridpoints.iterrows(): isclose = False point1 = np.array([row['xpos'], row['ypos']]) for x, y, z in adsorbatePoints: for dispx in [-a, a * 0, a]: for dispy in [-b, b * 0, b]: point2 = np.array([x, y]) point2 = point2 + dispx[:2] + dispy[:2] if np.linalg.norm(point1 - point2) < cutoff: isclose = True if not isclose: adsorbatePoints.append(np.append(point1, row['zpos'])) print('placement done') now = time.time() print(now - last) last = now adsvisbase = base.copy() maxz = np.max([atom.position[2] for atom in adsvisbase]) for point in adsorbatePoints: print(point[2]) add_adsorbate(adsvisbase, ads, height=point[2] - maxz + 1, position=(point[0], point[1])) gen.write_gen(outputpath, adsvisbase) view([data, base, adsvisbase])
def write_dftb(fileobj, images): """Write structure in GEN format (refer to DFTB+ manual). Multiple snapshots are not allowed. """ from ase.io.gen import write_gen write_gen(fileobj, images)
def randomGridMultiple(n, adsorbate, slab, h=2, outputDir="tempout/", numDirs=10, runsPerDir=17, shallow=False, kind='gen', minDist=1): """ Produces a collection of structures (of size ``numDirs`` * ``runsperDir``; each has n adsorbates randomly placed on given slab. Writes (.gen or vasp) outputs to desired directory Args: n: number of adsorbates per slab adsorbate: Atoms obj of adsorbate slab: Atoms obj of slab h: height of adsorbate (above max slab position) outputDir: Path (str) for desired output location. numDirs: Number of batches. Defaults to 10. runsPerDir: Number of sims per batch. Defaults to 17. shallow: If all runs to be at one directory level. Defaults to False. kind: output desired, from (gen, vasp). Defaults to gen. minDist: minimum distance between placed adsorbates (in Å) Returns: None """ np.random.seed(429) for d in range(numDirs): for run in range(runsPerDir): s = slab.copy() positions = [] while len(positions) < n: # generate random positions with dummy z required (3) pnew = s.cell.cartesian_positions(np.random.random(3)) if positions: dists = np.array([ np.sqrt((pnew[0] - p[0])**2 + (pnew[1] - p[1])**2) for p in positions ]) if np.any(dists < minDist): continue positions += [pnew] # construct and write for p in positions: add_adsorbate(s, adsorbate, height=h, position=p[:2]) if kind == 'gen': outname = "input{}-{}.gen".format( d, run) if not shallow else "input{}.gen".format( d * runsPerDir + run) gen.write_gen(outputDir + outname, s) elif kind == 'vasp': outname = "POSCAR{}-{}".format( d, run) if not shallow else "POSCAR{}".format(d * runsPerDir + run) vasp.write_vasp(outputDir + outname, s, sort=True, vasp5=True) else: raise AssertionError("kind should be from (vasp, gen)")
def main( numsofar, #use the run number you're seeding for batch, #current batch number velo, # velocity of incident Ar in Å/ps datadir="temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir="temp.new/", #files for output hbondrange=6, zmincutoff=0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk numperbatch=17, numbatches=10): numsofar = int(numsofar) batch = int(batch) velo = float(velo) hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) numperbatch = int(numperbatch) numbatches = int(numbatches) ############################## ### Read in geometry files ### ############################## geometries = {} for i in os.listdir(datadir): if "output" in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if "velos" in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header=None, dtype=float, sep="\s+") # to account for seed behavior from first numssofar sets of runs # numssofar can also be interpreted as = current run seeding for np.random.seed(429) for b in range(batch + numsofar * numbatches): for i in range(numperbatch): x_rand, y_rand, z_rand = np.append(np.random.random(size=2), 0) ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F': 0, 'C': 0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i, j]: g.addEdge(i, j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [ np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs ] atomsToRemove = [ i for g, r in zip(fragGraphs, removeFrag) if r for i in g ] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2, 2]] for idx in atomsToRemove: removedatoms[ geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[ atom.index for atom in geomcopy if atom.index in atomsToRemove ]] x_rand, y_rand, z_rand = geomcopy.cell.cartesian_positions( np.append(np.random.random(size=2), 0)) add_adsorbate(geomcopy, adsorbate='Ar', height=7, position=(x_rand, y_rand)) removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[ i not in atomsToRemove for i in np.arange(len(velos[key])) ]] trimmedvelos[key] = trimmedvelos[key].append(pd.Series([0, 0, -velo]), ignore_index=True) # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%svelos%s.in" % (outputdir, key), sep=" ", index=False, header=False)