def _ngl_write_structure(elements, positions, cell): """ Turns structure information into a NGLView-readable protein-database-formatted string. Args: elements (numpy.ndarray/list): Element symbol for each atom. positions (numpy.ndarray/list): Vector of Cartesian atom positions. cell (numpy.ndarray/list): Simulation cell Bravais matrix. Returns: (str): The PDB-formatted representation of the structure. """ from ase.geometry import cell_to_cellpar, cellpar_to_cell if cell is None or any(np.max(cell, axis=0) < 1e-2): # Define a dummy cell if it doesn't exist (eg. for clusters) max_pos = np.max(positions, axis=0) max_pos[np.abs(max_pos) < 1e-2] = 10 cell = np.eye(3) * max_pos cellpar = cell_to_cellpar(cell) exportedcell = cellpar_to_cell(cellpar) rotation = np.linalg.solve(cell, exportedcell) pdb_str = _ngl_write_cell(*cellpar) pdb_str += "MODEL 1\n" if rotation is not None: positions = np.array(positions).dot(rotation) for i, p in enumerate(positions): pdb_str += _ngl_write_atom(i, elements[i], *p) pdb_str += "ENDMDL \n" return pdb_str
def read_mustem(fd): r"""Import muSTEM input file. Reads cell, atom positions, etc. from muSTEM xtl file. The mustem xtl save the root mean square (RMS) displacement, which is convert to Debye-Waller (in Ų) factor by: .. math:: B = RMS * 8\pi^2 """ check_numpy_version() from ase.geometry import cellpar_to_cell # Read comment: fd.readline() # Parse unit cell parameter cellpar = [float(i) for i in fd.readline().split()[:3]] cell = cellpar_to_cell(cellpar) # beam energy fd.readline() # Number of different type of atoms element_number = int(fd.readline().strip()) # List of numpy arrays: # length of the list = number of different atom type (element_number) # length of each array = number of atoms for each atom type atomic_numbers = [] positions = [] debye_waller_factors = [] occupancies = [] for i in range(element_number): # Read the element _ = fd.readline() line = fd.readline().split() atoms_number = int(line[0]) atomic_number = int(line[1]) occupancy = float(line[2]) DW = float(line[3]) * 8 * np.pi**2 # read all the position for each element positions.append(np.genfromtxt(fname=fd, max_rows=atoms_number)) atomic_numbers.append(np.ones(atoms_number, dtype=int) * atomic_number) occupancies.append(np.ones(atoms_number) * occupancy) debye_waller_factors.append(np.ones(atoms_number) * DW) positions = np.vstack(positions) atoms = Atoms(cell=cell, scaled_positions=positions) atoms.set_atomic_numbers(np.hstack(atomic_numbers)) atoms.set_array('occupancies', np.hstack(occupancies)) atoms.set_array('debye_waller_factors', np.hstack(debye_waller_factors)) return atoms
def from_string(data): """ Reads a Res from a string. Args: data (str): string containing Res data. Returns: Res object. """ abc = [] ang = [] sp = [] coords = [] info = dict() coord_patt = re.compile("""(\w+)\s+ ([0-9]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)""", re.VERBOSE) lines = data.splitlines() line_no = 0 while line_no < len(lines): line = lines[line_no] tokens = line.split() if tokens: if tokens[0] == 'TITL': try: info = Res.parse_title(line) except (ValueError, IndexError): info = dict() elif tokens[0] == 'CELL' and len(tokens) == 8: abc = [float(tok) for tok in tokens[2:5]] ang = [float(tok) for tok in tokens[5:8]] elif tokens[0] == 'SFAC': for atom_line in lines[line_no:]: if line.strip() == 'END': break else: match = coord_patt.search(atom_line) if match: sp.append(match.group(1)) # 1-indexed cs = match.groups()[2:5] coords.append([float(c) for c in cs]) line_no += 1 # Make sure the global is updated line_no += 1 return Res(Atoms(symbols=sp, scaled_positions=coords, cell=cellpar_to_cell(list(abc) + list(ang)), pbc=True, info=info), info.get('name'), info.get('pressure'), info.get('energy'), info.get('spacegroup'), info.get('times_found'))
def gen_STO(): a = b = c = 3.94 alpha = beta = theta = 90 atoms = Atoms(symbols='SrTiO3', scaled_positions=[(0, 0, 0), (0.5, 0.5, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.5, 0.5, 0)], cell=cellpar_to_cell([a, b, c, alpha, beta, theta])) atoms = atoms.repeat([1, 1, 2]) #atoms.set_initial_magnetic_moments() return atoms
def read_res(lines): """ Reads a res file from a string Args: lines (str): A list of lines containing Res data. Returns: System object that is used internally by Dscribe """ abc = [] ang = [] species = [] coords = [] info = dict() coord_patt = re.compile( r"""(\w+)\s+ ([0-9]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)\s+ ([0-9\-\.]+)""", re.VERBOSE) line_no = 0 title_items = [] while line_no < len(lines): line = lines[line_no] tokens = line.split() if tokens: if tokens[0] == 'TITL': # Skip the TITLE line, the information is not used # in this package title_items = parse_titl(line) elif tokens[0] == 'CELL' and len(tokens) == 8: abc = [float(tok) for tok in tokens[2:5]] ang = [float(tok) for tok in tokens[5:8]] elif tokens[0] == 'SFAC': for atom_line in lines[line_no:]: if line.strip() == 'END': break match = coord_patt.search(atom_line) if match: species.append(match.group(1)) # 1-indexed xyz = match.groups()[2:5] coords.append([float(c) for c in xyz]) line_no += 1 # Make sure the global is updated line_no += 1 return title_items, Atoms(symbols=species, scaled_positions=coords, cell=cellpar_to_cell(list(abc) + list(ang)), pbc=True, info=info)
def write_proteindatabank(fileobj, images, write_arrays=True): """Write images to PDB-file.""" if isinstance(fileobj, basestring): fileobj = paropen(fileobj, 'w') if hasattr(images, 'get_positions'): images = [images] rotation = None if images[0].get_pbc().any(): from ase.geometry import cell_to_cellpar, cellpar_to_cell currentcell = images[0].get_cell() cellpar = cell_to_cellpar(currentcell) exportedcell = cellpar_to_cell(cellpar) rotation = np.linalg.solve(currentcell, exportedcell) # ignoring Z-value, using P1 since we have all atoms defined explicitly format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n' fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2], cellpar[3], cellpar[4], cellpar[5])) # 1234567 123 6789012345678901 89 67 456789012345678901234567 890 format = ('ATOM %5d %4s MOL 1 %8.3f%8.3f%8.3f%6.2f%6.2f' ' %2s \n') # RasMol complains if the atom index exceeds 100000. There might # be a limit of 5 digit numbers in this field. MAXNUM = 100000 symbols = images[0].get_chemical_symbols() natoms = len(symbols) for n, atoms in enumerate(images): fileobj.write('MODEL ' + str(n + 1) + '\n') p = atoms.get_positions() occupancy = np.ones(len(atoms)) bfactor = np.zeros(len(atoms)) if write_arrays: if 'occupancy' in atoms.arrays: occupancy = atoms.get_array('occupancy') if 'bfactor' in atoms.arrays: bfactor = atoms.get_array('bfactor') if rotation is not None: p = p.dot(rotation) for a in range(natoms): x, y, z = p[a] occ = occupancy[a] bf = bfactor[a] fileobj.write( format % (a % MAXNUM, symbols[a], x, y, z, occ, bf, symbols[a].upper())) fileobj.write('ENDMDL\n')
def motif_cell_fromCrystal(crystal, fill_cell=False): """ ccdc.crystal.Crystal --> np.array shape (m,3), np.array shape (3,3). Optional param fill_cell (default False) will expand an asymmetric unit to fill the unit cell. This is necessary if the .cif contains an asymmetric unit that needs to be expanded by symmetry operations. """ from ase.geometry import cellpar_to_cell cell = cellpar_to_cell([*crystal.cell_lengths, *crystal.cell_angles]) mol = crystal.molecule if not fill_cell else crystal.packing( inclusion='OnlyAtomsIncluded') motif = np.array([[a.coordinates.x, a.coordinates.y, a.coordinates.z] for a in mol.atoms]) return motif, cell
def _get_cell(text): # first check whether there is a lattice definition cell = np.zeros((3, 3)) lattice = _cell_3d.findall(text) if lattice: pbc = [True, True, True] for i, row in enumerate(lattice[0].strip().split('\n')): cell[i] = [float(x) for x in row.split()] return cell, pbc pbc = [False, False, False] lengths = [None, None, None] angles = [None, None, None] for row in text.strip().split('\n'): row = row.strip().lower() for dim, vecname in enumerate(['a', 'b', 'c']): if row.startswith('lat_{}'.format(vecname)): pbc[dim] = True lengths[dim] = float(row.split()[1]) for i, angle in enumerate(['alpha', 'beta', 'gamma']): if row.startswith(angle): angles[i] = float(row.split()[1]) if not np.any(pbc): return None, pbc for i in range(3): a, b, c = np.roll(np.array([0, 1, 2]), i) if pbc[a] and pbc[b]: assert angles[c] is not None if angles[c] is not None: assert pbc[a] and pbc[b] # The easiest case: all three lattice vectors and angles are specified if np.all(pbc): return cellpar_to_cell(lengths + angles), pbc # Next easiest case: exactly one lattice vector has been specified if np.sum(pbc) == 1: dim = np.argmax(pbc) cell[dim, dim] = lengths[dim] return cell, pbc # Hardest case: two lattice vectors are specified. dim1, dim2 = [dim for dim, ipbc in enumerate(pbc) if ipbc] angledim = np.argmin(pbc) cell[dim1, dim1] = lengths[dim1] cell[dim2, dim2] = lengths[dim2] * np.sin(angles[angledim]) cell[dim2, dim1] = lengths[dim2] * np.cos(angles[angledim]) return cell, pbc
def read_eon(fileobj): """Reads an EON reactant.con file. If *fileobj* is the name of a "states" directory created by EON, all the structures will be read.""" if isinstance(fileobj, str): if (os.path.isdir(fileobj)): return read_states(fileobj) else: f = open(fileobj) else: f = fileobj comment = f.readline().strip() f.readline() # 0.0000 TIME (??) cell_lengths = f.readline().split() cell_angles = f.readline().split() # Different order of angles in EON. cell_angles = [cell_angles[2], cell_angles[1], cell_angles[0]] cellpar = [float(x) for x in cell_lengths + cell_angles] f.readline() # 0 0 (??) f.readline() # 0 0 0 (??) ntypes = int(f.readline()) # number of atom types natoms = [int(n) for n in f.readline().split()] atommasses = [float(m) for m in f.readline().split()] symbols = [] coords = [] masses = [] fixed = [] for n in range(ntypes): symbol = f.readline().strip() symbols.extend([symbol] * natoms[n]) masses.extend([atommasses[n]] * natoms[n]) f.readline() # Coordinates of Component n for i in range(natoms[n]): row = f.readline().split() coords.append([float(x) for x in row[:3]]) fixed.append(bool(int(row[3]))) if isinstance(fileobj, str): f.close() atoms = Atoms(symbols=symbols, positions=coords, masses=masses, cell=cellpar_to_cell(cellpar), constraint=FixAtoms(mask=fixed), info=dict(comment=comment)) return atoms
def read_mustem(filename): """Import muSTEM input file. Reads cell, atom positions, etc. from muSTEM xtl file """ from ase import Atoms from ase.geometry import cellpar_to_cell if isinstance(filename, str): f = open(filename) else: # Assume it's a file-like object f = filename # Read comment: f.readline() # Parse unit cell parameter cellpar = [float(i) for i in f.readline().strip().split()[:3]] cell = cellpar_to_cell(cellpar) # beam energy f.readline() # Number of different type of atoms element_number = int(f.readline().strip()) symbols = [] positions = [] for i in range(element_number): # Read the element symbol = str(f.readline().strip()) atoms_number = int(f.readline().split()[0]) # read all the position for each element for j in range(atoms_number): line = f.readline() positions.append([float(i) for i in line.strip().split()]) symbols.append(symbol) f.close() atoms = Atoms(cell=cell, scaled_positions=positions) atoms.set_chemical_symbols(symbols) return atoms
def set_substrate(atoms, a=None, b=None, c=None, angle_ab=90.0, all_angle_90=False, m=1, fix_volume=False): """ set atoms cellpars to fit the substrate cellpars. a,b , angle_ab are the the inplane cellpars of the substrate all_angle_90: if True,all the angles will be set to 90 m is the multiplier. a*m b*m fix_volume: whether to set c so that the volume is unchanged. Note that this is not always really the case if angles of a-b plane and c is changed. """ cellpars = cell_to_cellpar(atoms.get_cell()) print(a, b, c) a0, b0, c0 = cellpars[0:3] if a is not None: cellpars[0] = a * m if b is not None: cellpars[1] = b * m if c is not None: cellpars[2] = c * m if angle_ab is not None: cellpars[5] = angle_ab if all_angle_90: cellpars[3:] = [90, 90, 90] print(cellpars) if fix_volume: ab = cellpars[5] c = (a0 * b0 * c0 * math.sin(math.radians(ab)) / (a * b * m * m * math.sin(math.radians(angle_ab)))) cellpars[2] = c cell = cellpar_to_cell(cellpars) print(cell) spos = atoms.get_scaled_positions() atoms.set_cell(cell) atoms.set_scaled_positions(spos) return atoms
def write_v_sim(filename, atoms): """Write V_Sim input file. Writes the atom positions and unit cell. """ from ase.geometry import cellpar_to_cell, cell_to_cellpar if isinstance(filename, str): f = open(filename) else: # Assume it's a file-like object f = filename # Convert the lattice vectors to triangular matrix by converting # to and from a set of lengths and angles cell = cellpar_to_cell(cell_to_cellpar(atoms.cell)) dxx = cell[0, 0] dyx, dyy = cell[1, 0:2] dzx, dzy, dzz = cell[2, 0:3] f.write('===== v_sim input file created using the' ' Atomic Simulation Environment (ASE) ====\n') f.write('{0} {1} {2}\n'.format(dxx, dyx, dyy)) f.write('{0} {1} {2}\n'.format(dzx, dzy, dzz)) # Use v_sim 3.5 keywords to indicate scaled positions, etc. f.write('#keyword: reduced\n') f.write('#keyword: angstroem\n') if np.alltrue(atoms.pbc): f.write('#keyword: periodic\n') elif not np.any(atoms.pbc): f.write('#keyword: freeBC\n') elif np.array_equiv(atoms.pbc, [True, False, True]): f.write('#keyword: surface\n') else: raise Exception( 'Only supported boundary conditions are full PBC,' ' no periodic boundary, and surface which is free in y direction' ' (i.e. Atoms.pbc = [True, False, True]).') # Add atoms (scaled positions) for position, symbol in zip(atoms.get_scaled_positions(), atoms.get_chemical_symbols()): f.write('{0} {1} {2} {3}\n'.format(position[0], position[1], position[2], symbol))
def test_monoclinic(): """Test band structure from different variations of hexagonal cells.""" import numpy as np from ase import Atoms from ase.calculators.test import FreeElectrons from ase.geometry import (crystal_structure_from_cell, cell_to_cellpar, cellpar_to_cell) from ase.dft.kpoints import get_special_points mc1 = [[1, 0, 0], [0, 1, 0], [0, 0.2, 1]] par = cell_to_cellpar(mc1) mc2 = cellpar_to_cell(par) mc3 = [[1, 0, 0], [0, 1, 0], [-0.2, 0, 1]] mc4 = [[1, 0, 0], [-0.2, 1, 0], [0, 0, 1]] path = 'GYHCEM1AXH1' firsttime = True for cell in [mc1, mc2, mc3, mc4]: a = Atoms(cell=cell, pbc=True) a.cell *= 3 a.calc = FreeElectrons(nvalence=1, kpts={'path': path}) cs = crystal_structure_from_cell(a.cell) assert cs == 'monoclinic' r = a.cell.reciprocal() k = get_special_points(a.cell)['H'] print(np.dot(k, r)) a.get_potential_energy() bs = a.calc.band_structure() coords, labelcoords, labels = bs.get_labels() assert ''.join(labels) == path e_skn = bs.energies # bs.plot() if firsttime: coords1 = coords labelcoords1 = labelcoords e_skn1 = e_skn firsttime = False else: for d in [ coords - coords1, labelcoords - labelcoords1, e_skn - e_skn1 ]: print(abs(d).max()) assert abs(d).max() < 1e-13, d
def write_v_sim(filename, atoms): """Write V_Sim input file. Writes the atom positions and unit cell. """ from ase.geometry import cellpar_to_cell, cell_to_cellpar if isinstance(filename, str): f = open(filename) else: # Assume it's a file-like object f = filename # Convert the lattice vectors to triangular matrix by converting # to and from a set of lengths and angles cell = cellpar_to_cell(cell_to_cellpar(atoms.cell)) dxx = cell[0, 0] dyx, dyy = cell[1, 0:2] dzx, dzy, dzz = cell[2, 0:3] f.write('===== v_sim input file created using the' ' Atomic Simulation Environment (ASE) ====\n') f.write('{0} {1} {2}\n'.format(dxx, dyx, dyy)) f.write('{0} {1} {2}\n'.format(dzx, dzy, dzz)) # Use v_sim 3.5 keywords to indicate scaled positions, etc. f.write('#keyword: reduced\n') f.write('#keyword: angstroem\n') if np.alltrue(atoms.pbc): f.write('#keyword: periodic\n') elif not np.any(atoms.pbc): f.write('#keyword: freeBC\n') elif np.array_equiv(atoms.pbc, [True, False, True]): f.write('#keyword: surface\n') else: raise Exception('Only supported boundary conditions are full PBC,' ' no periodic boundary, and surface which is free in y direction' ' (i.e. Atoms.pbc = [True, False, True]).') # Add atoms (scaled positions) for position, symbol in zip(atoms.get_scaled_positions(), atoms.get_chemical_symbols()): f.write('{0} {1} {2} {3}\n'.format( position[0], position[1], position[2], symbol))
def read_proteindatabank(fileobj, index=-1): """Read PDB files. The format is assumed to follow the description given in http://www.wwpdb.org/documentation/format32/sect8.html and http://www.wwpdb.org/documentation/format32/sect9.html.""" if isinstance(fileobj, str): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) atoms = Atoms() for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [float(word) for word in line[6:54].split()] atoms.set_cell(cellpar_to_cell(cellpar)) for c in range(3): if line.startswith('ORIGX' + '123'[c]): pars = [float(word) for word in line[10:55].split()] orig[c] = pars[:3] trans[c] = pars[3] if line.startswith('ATOM') or line.startswith('HETATM'): try: # Atom name is arbitrary and does not necessarily # contain the element symbol. The specification # requires the element symbol to be in columns 77+78. symbol = line[76:78].strip().lower().capitalize() words = line[30:55].split() position = np.array([float(words[0]), float(words[1]), float(words[2])]) position = np.dot(orig, position) + trans atoms.append(Atom(symbol, position)) except: pass if line.startswith('ENDMDL'): images.append(atoms) atoms = Atoms() if len(images) == 0: images.append(atoms) return images[index]
def read_pdb(fileobj, index=-1): """Read PDB files. The format is assumed to follow the description given in http://www.wwpdb.org/documentation/format32/sect8.html and http://www.wwpdb.org/documentation/format32/sect9.html.""" if isinstance(fileobj, str): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) atoms = Atoms() for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [float(word) for word in line[6:54].split()] atoms.set_cell(cellpar_to_cell(cellpar)) for c in range(3): if line.startswith('ORIGX' + '123'[c]): pars = [float(word) for word in line[10:55].split()] orig[c] = pars[:3] trans[c] = pars[3] if line.startswith('ATOM') or line.startswith('HETATM'): try: # Atom name is arbitrary and does not necessarily contain the element symbol. # The specification requires the element symbol to be in columns 77+78. symbol = line[76:78].strip().lower().capitalize() words = line[30:55].split() position = np.array( [float(words[0]), float(words[1]), float(words[2])]) position = np.dot(orig, position) + trans atoms.append(Atom(symbol, position)) except: pass if line.startswith('ENDMDL'): images.append(atoms) atoms = Atoms() if len(images) == 0: images.append(atoms) return images[index]
def write_proteindatabank(fileobj, images): """Write images to PDB-file.""" if isinstance(fileobj, basestring): fileobj = paropen(fileobj, 'w') if hasattr(images, 'get_positions'): images = [images] rotation = None if images[0].get_pbc().any(): from ase.geometry import cell_to_cellpar, cellpar_to_cell currentcell = images[0].get_cell() cellpar = cell_to_cellpar(currentcell) exportedcell = cellpar_to_cell(cellpar) rotation = np.linalg.solve(currentcell, exportedcell) # ignoring Z-value, using P1 since we have all atoms defined explicitly format = 'CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1\n' fileobj.write(format % (cellpar[0], cellpar[1], cellpar[2], cellpar[3], cellpar[4], cellpar[5])) # 1234567 123 6789012345678901 89 67 456789012345678901234567 890 format = ('ATOM %5d %4s MOL 1 %8.3f%8.3f%8.3f 1.00 0.00' ' %2s \n') # RasMol complains if the atom index exceeds 100000. There might # be a limit of 5 digit numbers in this field. MAXNUM = 100000 symbols = images[0].get_chemical_symbols() natoms = len(symbols) for n, atoms in enumerate(images): fileobj.write('MODEL ' + str(n + 1) + '\n') p = atoms.get_positions() if rotation is not None: p = p.dot(rotation) for a in range(natoms): x, y, z = p[a] fileobj.write(format % (a % MAXNUM, symbols[a], x, y, z, symbols[a].upper())) fileobj.write('ENDMDL\n')
def read_proteindatabank(fileobj, index=-1): """Read PDB files.""" if isinstance(fileobj, basestring): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) atoms = Atoms() for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [float(word) for word in line[6:54].split()] atoms.set_cell(cellpar_to_cell(cellpar)) atoms.pbc = True for c in range(3): if line.startswith('ORIGX' + '123'[c]): pars = [float(word) for word in line[10:55].split()] orig[c] = pars[:3] trans[c] = pars[3] if line.startswith('ATOM') or line.startswith('HETATM'): try: # Atom name is arbitrary and does not necessarily # contain the element symbol. The specification # requires the element symbol to be in columns 77+78. symbol = line[76:78].strip().lower().capitalize() words = line[30:55].split() position = np.array( [float(words[0]), float(words[1]), float(words[2])]) position = np.dot(orig, position) + trans atoms.append(Atom(symbol, position)) except Exception as ex: warnings.warn( 'Discarding atom when reading PDB file: {}'.format(ex)) if line.startswith('ENDMDL'): images.append(atoms) atoms = Atoms() if len(images) == 0: images.append(atoms) return images[index]
def read_stru_to_ase(filename): """This function read the legacy DISCUS stru file format into a ASE Atoms object. :param filename: filename of DISCUS stru file :type filename: str :return: ASE Atoms object :rtype: :class:`ase.Atoms` """ from ase import Atoms from ase.geometry import cellpar_to_cell with open(filename) as f: lines = f.readlines() a = b = c = alpha = beta = gamma = 0 reading_atom_list = False symbols = [] positions = [] for l in lines: line = l.replace(',', ' ').split() if not reading_atom_list: # Wait for 'atoms' line before reading atoms if line[0] == 'cell': a, b, c, alpha, beta, gamma = [float(x) for x in line[1:7]] cell = cellpar_to_cell([a, b, c, alpha, beta, gamma]) if line[0] == 'atoms': if a == 0: print("Cell not found") cell = None reading_atom_list = True else: symbol, x, y, z = line[:4] symbol = symbol.capitalize() symbols.append(symbol) positions.append([float(x), float(y), float(z)]) # Return ASE Atoms object return Atoms(symbols=symbols, scaled_positions=positions, cell=cell)
def read_proteindatabank(fileobj, index=-1): """Read PDB files.""" if isinstance(fileobj, basestring): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) atoms = Atoms() for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [float(word) for word in line[6:54].split()] atoms.set_cell(cellpar_to_cell(cellpar)) atoms.pbc = True for c in range(3): if line.startswith('ORIGX' + '123'[c]): pars = [float(word) for word in line[10:55].split()] orig[c] = pars[:3] trans[c] = pars[3] if line.startswith('ATOM') or line.startswith('HETATM'): try: # Atom name is arbitrary and does not necessarily # contain the element symbol. The specification # requires the element symbol to be in columns 77+78. symbol = line[76:78].strip().lower().capitalize() words = line[30:55].split() position = np.array([float(words[0]), float(words[1]), float(words[2])]) position = np.dot(orig, position) + trans atoms.append(Atom(symbol, position)) except Exception as ex: warnings.warn('Discarding atom when reading PDB file: {}' .format(ex)) if line.startswith('ENDMDL'): images.append(atoms) atoms = Atoms() if len(images) == 0: images.append(atoms) return images[index]
def __init__(self, cell_dims, nx=50, ny=50, nz=50, pbc=[True,True,True], pbc_strict_mode=True): import numpy as np from ase.geometry import Cell, cellpar_to_cell if np.shape(cell_dims)==(6,): cellpar = cellpar_to_cell(cell_dims) self.Cell = Cell(cellpar) elif np.shape(cell_dims)==(3,3): self.Cell = Cell(cell_dims) else: raise Exception("Specify cell parameters as a (6,) (x,y,z,alpha,beta,gamma) or (3x3) array.") self.nx, self.ny, self.nz = nx, ny, nz self.pbc = np.zeros(3, bool) self.pbc = pbc self.X = np.linspace(0, 1, self.nx) self.Y = np.linspace(0, 1, self.ny) self.Z = np.linspace(0, 1, self.nz) # Define meshgrid in fractional coordinates. self.frac_xx, self.frac_yy, self.frac_zz = np.meshgrid(self.X, self.Y, self.Z, indexing='xy') # Project from fractional coordinates to cartesian. stack_mesh = np.stack((self.frac_xx,self.frac_yy,self.frac_zz),axis=-1) stack_mesh = np.einsum('ji,abcj->iabc', self.Cell.array, stack_mesh) self.xx, self.yy, self.zz = stack_mesh[0], stack_mesh[1], stack_mesh[2] # Allows easy conversion from cart -> frac coordinates. self.inverse_cell_array = np.linalg.inv(self.Cell.array) # Define the Cell parameters as (a,b,c,alpha,beta,gamma) array without calling # Cell.cellpar() method everytime. self.cellpar = self.Cell.cellpar() # Enforces Mesh and Atoms pbc match - if off, ignores mismatches. self.strict_mode = pbc_strict_mode
def normalize(atoms, set_origin=False): """ set the most near 0 to 0 make cell -> cellpar ->cell make all the scaled postion between 0 and 1 """ newatoms = atoms.copy() newatoms = force_near_0(newatoms) if set_origin: positions = newatoms.get_positions() s_positions = sorted(positions, key=np.linalg.norm) newatoms.translate(-s_positions[0]) positions = newatoms.get_scaled_positions() newcell = cellpar_to_cell(cell_to_cellpar(newatoms.get_cell())) newatoms.set_cell(newcell) for i, pos in enumerate(positions): positions[i] = np.mod(pos, 1.0) newatoms.set_scaled_positions(positions) return newatoms
def read_jsv(f): """Reads a JSV file.""" natom = nbond = npoly = 0 symbols = [] labels = [] cellpar = basis = title = bonds = poly = origin = shell_numbers = None spacegroup = 1 headline = f.readline().strip() while True: line = f.readline() if not line: break line = line.strip() m = re.match(r"^\[([^]]+)\]\s*(.*)", line) if m is None or not line: continue tag = m.groups()[0].lower() if len(m.groups()) > 1: args = m.groups()[1].split() else: args = [] if tag == "cell": cellpar = [float(x) for x in args] elif tag == "natom": natom = int(args[0]) elif tag == "nbond": nbond = int(args[0]) # optional margin of the bondlengths elif tag == "npoly": npoly = int(args[0]) elif tag == "space_group": spacegroup = Spacegroup(*tuple(int(x) for x in args)) elif tag == "title": title = m.groups()[1] elif tag == "atoms": symbols = [] basis = np.zeros((natom, 3), dtype=float) shell_numbers = -np.ones((natom,), dtype=int) # float? for i in range(natom): tokens = f.readline().strip().split() labels.append(tokens[0]) symbols.append(ase.data.chemical_symbols[int(tokens[1])]) basis[i] = [float(x) for x in tokens[2:5]] if len(tokens) > 5: shell_numbers[i] = float(tokens[5]) # float? elif tag == "bonds": for i in range(nbond): f.readline() bonds = NotImplemented elif tag == "poly": for i in range(npoly): f.readline() poly = NotImplemented elif tag == "origin": origin = NotImplemented else: raise ValueError('Unknown tag: "%s"' % tag) if headline == "asymmetric_unit_cell": atoms = crystal(symbols=symbols, basis=basis, spacegroup=spacegroup, cellpar=cellpar) elif headline == "full_unit_cell": atoms = ase.Atoms(symbols=symbols, scaled_positions=basis, cell=cellpar_to_cell(cellpar)) atoms.info["spacegroup"] = Spacegroup(spacegroup) elif headline == "cartesian_cell": atoms = ase.Atoms(symbols=symbols, positions=basis, cell=cellpar_to_cell(cellpar)) atoms.info["spacegroup"] = Spacegroup(spacegroup) else: raise ValueError('Invalid JSV file type: "%s"' % headline) atoms.info["title"] = title atoms.info["labels"] = labels if bonds is not None: atoms.info["bonds"] = bonds if poly is not None: atoms.info["poly"] = poly if origin is not None: atoms.info["origin"] = origin if shell_numbers is not None: atoms.info["shell_numbers"] = shell_numbers return atoms
def crystal(symbols=None, basis=None, spacegroup=1, setting=1, cell=None, cellpar=None, ab_normal=(0, 0, 1), a_direction=None, size=(1, 1, 1), onduplicates='warn', symprec=0.001, pbc=True, primitive_cell=False, **kwargs): """Create an Atoms instance for a conventional unit cell of a space group. Parameters: symbols : str | sequence of str | sequence of Atom | Atoms Element symbols of the unique sites. Can either be a string formula or a sequence of element symbols. E.g. ('Na', 'Cl') and 'NaCl' are equivalent. Can also be given as a sequence of Atom objects or an Atoms object. basis : list of scaled coordinates Positions of the unique sites corresponding to symbols given either as scaled positions or through an atoms instance. Not needed if *symbols* is a sequence of Atom objects or an Atoms object. spacegroup : int | string | Spacegroup instance Space group given either as its number in International Tables or as its Hermann-Mauguin symbol. setting : 1 | 2 Space group setting. cell : 3x3 matrix Unit cell vectors. cellpar : [a, b, c, alpha, beta, gamma] Cell parameters with angles in degree. Is not used when `cell` is given. ab_normal : vector Is used to define the orientation of the unit cell relative to the Cartesian system when `cell` is not given. It is the normal vector of the plane spanned by a and b. a_direction : vector Defines the orientation of the unit cell a vector. a will be parallel to the projection of `a_direction` onto the a-b plane. size : 3 positive integers How many times the conventional unit cell should be repeated in each direction. onduplicates : 'keep' | 'replace' | 'warn' | 'error' Action if `basis` contain symmetry-equivalent positions: 'keep' - ignore additional symmetry-equivalent positions 'replace' - replace 'warn' - like 'keep', but issue an UserWarning 'error' - raises a SpacegroupValueError symprec : float Minimum "distance" betweed two sites in scaled coordinates before they are counted as the same site. pbc : one or three bools Periodic boundary conditions flags. Examples: True, False, 0, 1, (1, 1, 0), (True, False, False). Default is True. primitive_cell : bool Wheter to return the primitive instead of the conventional unit cell. Keyword arguments: All additional keyword arguments are passed on to the Atoms constructor. Currently, probably the most useful additional keyword arguments are `info`, `constraint` and `calculator`. Examples: Two diamond unit cells (space group number 227) >>> diamond = crystal('C', [(0,0,0)], spacegroup=227, ... cellpar=[3.57, 3.57, 3.57, 90, 90, 90], size=(2,1,1)) >>> ase.view(diamond) # doctest: +SKIP A CoSb3 skutterudite unit cell containing 32 atoms >>> skutterudite = crystal(('Co', 'Sb'), ... basis=[(0.25,0.25,0.25), (0.0, 0.335, 0.158)], ... spacegroup=204, cellpar=[9.04, 9.04, 9.04, 90, 90, 90]) >>> len(skutterudite) 32 """ sg = Spacegroup(spacegroup, setting) if (not isinstance(symbols, str) and hasattr(symbols, '__getitem__') and len(symbols) > 0 and isinstance(symbols[0], ase.Atom)): symbols = ase.Atoms(symbols) if isinstance(symbols, ase.Atoms): basis = symbols symbols = basis.get_chemical_symbols() if isinstance(basis, ase.Atoms): basis_coords = basis.get_scaled_positions() if cell is None and cellpar is None: cell = basis.cell if symbols is None: symbols = basis.get_chemical_symbols() else: basis_coords = np.array(basis, dtype=float, copy=False, ndmin=2) sites, kinds = sg.equivalent_sites(basis_coords, onduplicates=onduplicates, symprec=symprec) symbols = parse_symbols(symbols) symbols = [symbols[i] for i in kinds] if cell is None: cell = cellpar_to_cell(cellpar, ab_normal, a_direction) info = dict(spacegroup=sg) if primitive_cell: info['unit_cell'] = 'primitive' else: info['unit_cell'] = 'conventional' if 'info' in kwargs: info.update(kwargs['info']) kwargs['info'] = info atoms = ase.Atoms(symbols, scaled_positions=sites, cell=cell, pbc=pbc, **kwargs) if isinstance(basis, ase.Atoms): for name in basis.arrays: if not atoms.has(name): array = basis.get_array(name) atoms.new_array(name, [array[i] for i in kinds], dtype=array.dtype, shape=array.shape[1:]) if primitive_cell: from ase.build import cut prim_cell = sg.scaled_primitive_cell atoms = cut(atoms, a=prim_cell[0], b=prim_cell[1], c=prim_cell[2]) if size != (1, 1, 1): atoms = atoms.repeat(size) return atoms
def __getitem__(self, i=-1): self._open() if isinstance(i, slice): return [self[j] for j in range(*i.indices(self._len()))] N = self._len() if 0 <= i < N: # Non-periodic boundaries have cell_length == 0.0 cell_lengths = \ np.array(self.nc.variables[self._cell_lengths_var][i][:]) pbc = np.abs(cell_lengths > 1e-6) # Do we have a cell origin? if self._has_variable(self._cell_origin_var): origin = np.array( self.nc.variables[self._cell_origin_var][i][:]) else: origin = np.zeros([3], dtype=float) # Do we have an index variable? if self._has_variable(self.index_var): index = np.array(self.nc.variables[self.index_var][i][:]) + \ self.index_offset else: index = np.arange(self.n_atoms) # Read element numbers self.numbers = self._get_data(self._numbers_var, i, exc=False) if self.numbers is None: self.numbers = np.ones(self.n_atoms, dtype=int) else: self.numbers = np.array(self.numbers[index]) if self.types_to_numbers is not None: self.numbers = self.types_to_numbers[self.numbers] self.masses = atomic_masses[self.numbers] # Read positions positions = np.array(self._get_data(self._positions_var, i)[index]) # Determine cell size for non-periodic directions for dim in np.arange(3)[np.logical_not(pbc)]: origin[dim] = positions[:, dim].min() cell_lengths[dim] = positions[:, dim].max() - origin[dim] # Construct cell shape from cell lengths and angles cell = cellpar_to_cell( list(cell_lengths) + list(self.nc.variables[self._cell_angles_var][i]) ) # Compute momenta from velocities (if present) momenta = self._get_data(self._velocities_var, i, exc=False) if momenta is not None: momenta = momenta[index] * self.masses.reshape(-1, 1) # Fill info dict with additional data found in the NetCDF file info = {} for name in self.extra_per_frame_atts: info[name] = np.array(self.nc.variables[name][i]) # Create atoms object atoms = ase.Atoms( positions=positions - origin.reshape(1, -1), numbers=self.numbers, cell=cell, momenta=momenta, masses=self.masses, pbc=pbc, info=info ) # Attach additional arrays found in the NetCDF file for name in self.extra_per_frame_vars: atoms.set_array(name, self.nc.variables[name][i][index]) for name in self.extra_per_file_vars: atoms.set_array(name, self.nc.variables[name][:]) self._close() return atoms i = N + i if i < 0 or i >= N: self._close() raise IndexError('Trajectory index out of range.') return self[i]
def read_v_sim(fd): """Import V_Sim input file. Reads cell, atom positions, etc. from v_sim ascii file """ from ase import Atoms, units from ase.geometry import cellpar_to_cell import re # Read comment: fd.readline() line = fd.readline() + ' ' + fd.readline() box = line.split() for i in range(len(box)): box[i] = float(box[i]) keywords = [] positions = [] symbols = [] unit = 1.0 re_comment = re.compile(r'^\s*[#!]') re_node = re.compile(r'^\s*\S+\s+\S+\s+\S+\s+\S+') while True: line = fd.readline() if line == '': break # EOF p = re_comment.match(line) if p is not None: # remove comment character at the beginning of line line = line[p.end():].replace(',', ' ').lower() if line[:8] == "keyword:": keywords.extend(line[8:].split()) elif re_node.match(line): unit = 1.0 if not ("reduced" in keywords): if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr fields = line.split() positions.append([ unit * float(fields[0]), unit * float(fields[1]), unit * float(fields[2]) ]) symbols.append(fields[3]) if ("surface" in keywords) or ("freeBC" in keywords): raise NotImplementedError # create atoms object based on the information if "angdeg" in keywords: cell = cellpar_to_cell(box) else: unit = 1.0 if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr cell = np.zeros((3, 3)) cell.flat[[0, 3, 4, 6, 7, 8]] = box[:6] cell *= unit if "reduced" in keywords: atoms = Atoms(cell=cell, scaled_positions=positions) else: atoms = Atoms(cell=cell, positions=positions) atoms.set_chemical_symbols(symbols) return atoms
def crystal(symbols=None, basis=None, occupancies=None, spacegroup=1, setting=1, cell=None, cellpar=None, ab_normal=(0, 0, 1), a_direction=None, size=(1, 1, 1), onduplicates='warn', symprec=0.001, pbc=True, primitive_cell=False, **kwargs): """Create an Atoms instance for a conventional unit cell of a space group. Parameters: symbols : str | sequence of str | sequence of Atom | Atoms Element symbols of the unique sites. Can either be a string formula or a sequence of element symbols. E.g. ('Na', 'Cl') and 'NaCl' are equivalent. Can also be given as a sequence of Atom objects or an Atoms object. basis : list of scaled coordinates Positions of the unique sites corresponding to symbols given either as scaled positions or through an atoms instance. Not needed if *symbols* is a sequence of Atom objects or an Atoms object. occupancies : list of site occupancies Occupancies of the unique sites. Defaults to 1.0 and thus no mixed occupancies are considered if not explicitly asked for. If occupancies are given, the most dominant species will yield the atomic number. spacegroup : int | string | Spacegroup instance Space group given either as its number in International Tables or as its Hermann-Mauguin symbol. setting : 1 | 2 Space group setting. cell : 3x3 matrix Unit cell vectors. cellpar : [a, b, c, alpha, beta, gamma] Cell parameters with angles in degree. Is not used when `cell` is given. ab_normal : vector Is used to define the orientation of the unit cell relative to the Cartesian system when `cell` is not given. It is the normal vector of the plane spanned by a and b. a_direction : vector Defines the orientation of the unit cell a vector. a will be parallel to the projection of `a_direction` onto the a-b plane. size : 3 positive integers How many times the conventional unit cell should be repeated in each direction. onduplicates : 'keep' | 'replace' | 'warn' | 'error' Action if `basis` contain symmetry-equivalent positions: 'keep' - ignore additional symmetry-equivalent positions 'replace' - replace 'warn' - like 'keep', but issue an UserWarning 'error' - raises a SpacegroupValueError symprec : float Minimum "distance" betweed two sites in scaled coordinates before they are counted as the same site. pbc : one or three bools Periodic boundary conditions flags. Examples: True, False, 0, 1, (1, 1, 0), (True, False, False). Default is True. primitive_cell : bool Wheter to return the primitive instead of the conventional unit cell. Keyword arguments: All additional keyword arguments are passed on to the Atoms constructor. Currently, probably the most useful additional keyword arguments are `info`, `constraint` and `calculator`. Examples: Two diamond unit cells (space group number 227) >>> diamond = crystal('C', [(0,0,0)], spacegroup=227, ... cellpar=[3.57, 3.57, 3.57, 90, 90, 90], size=(2,1,1)) >>> ase.view(diamond) # doctest: +SKIP A CoSb3 skutterudite unit cell containing 32 atoms >>> skutterudite = crystal(('Co', 'Sb'), ... basis=[(0.25,0.25,0.25), (0.0, 0.335, 0.158)], ... spacegroup=204, cellpar=[9.04, 9.04, 9.04, 90, 90, 90]) >>> len(skutterudite) 32 """ sg = Spacegroup(spacegroup, setting) if (not isinstance(symbols, basestring) and hasattr(symbols, '__getitem__') and len(symbols) > 0 and isinstance(symbols[0], ase.Atom)): symbols = ase.Atoms(symbols) if isinstance(symbols, ase.Atoms): basis = symbols symbols = basis.get_chemical_symbols() if isinstance(basis, ase.Atoms): basis_coords = basis.get_scaled_positions() if cell is None and cellpar is None: cell = basis.cell if symbols is None: symbols = basis.get_chemical_symbols() else: basis_coords = np.array(basis, dtype=float, copy=False, ndmin=2) if occupancies is not None: occupancies_dict = {} for index, coord in enumerate(basis_coords): # Compute all distances and get indices of nearest atoms dist = spatial.distance.cdist(coord.reshape(1, 3), basis_coords) indices_dist = np.flatnonzero(dist < symprec) occ = {symbols[index]: occupancies[index]} # Check nearest and update occupancy for index_dist in indices_dist: if index == index_dist: continue else: occ.update({symbols[index_dist]: occupancies[index_dist]}) occupancies_dict[index] = occ.copy() sites, kinds = sg.equivalent_sites(basis_coords, onduplicates=onduplicates, symprec=symprec) # this is needed to handle deuterium masses masses = None if 'masses' in kwargs: masses = kwargs['masses'][kinds] del kwargs['masses'] symbols = parse_symbols(symbols) if occupancies is None: symbols = [symbols[i] for i in kinds] else: # make sure that we put the dominant species there symbols = [ sorted(occupancies_dict[i].items(), key=lambda x: x[1])[-1][0] for i in kinds ] if cell is None: cell = cellpar_to_cell(cellpar, ab_normal, a_direction) info = dict(spacegroup=sg) if primitive_cell: info['unit_cell'] = 'primitive' else: info['unit_cell'] = 'conventional' if 'info' in kwargs: info.update(kwargs['info']) if occupancies is not None: info['occupancy'] = occupancies_dict kwargs['info'] = info atoms = ase.Atoms(symbols, scaled_positions=sites, cell=cell, pbc=pbc, masses=masses, **kwargs) # if all occupancies are 1, no partial occupancy present if occupancies: if not all([occ == 1 for occ in occupancies]): # use tags to identify sites, and in particular the occupancy atoms.set_tags(kinds) if isinstance(basis, ase.Atoms): for name in basis.arrays: if not atoms.has(name): array = basis.get_array(name) atoms.new_array(name, [array[i] for i in kinds], dtype=array.dtype, shape=array.shape[1:]) if primitive_cell: from ase.build import cut prim_cell = sg.scaled_primitive_cell # Preserve calculator if present: calc = atoms.calc atoms = cut(atoms, a=prim_cell[0], b=prim_cell[1], c=prim_cell[2]) atoms.calc = calc if size != (1, 1, 1): atoms = atoms.repeat(size) return atoms
"""Test band structure from different variations of hexagonal cells.""" import numpy as np from ase import Atoms from ase.calculators.test import FreeElectrons from ase.geometry import crystal_structure_from_cell, cell_to_cellpar, cellpar_to_cell from ase.dft.kpoints import get_special_points mc1 = [[1, 0, 0], [0, 1, 0], [0, 0.2, 1]] par = cell_to_cellpar(mc1) mc2 = cellpar_to_cell(par) mc3 = [[1, 0, 0], [0, 1, 0], [-0.2, 0, 1]] path = 'GYHCEM1AXH1' firsttime = True for cell in [mc1, mc2, mc3]: a = Atoms(cell=cell, pbc=True) a.cell *= 3 a.calc = FreeElectrons(nvalence=1, kpts={'path': path}) print(crystal_structure_from_cell(a.cell)) r = a.get_reciprocal_cell() k = get_special_points(a.cell)['H'] print(np.dot(k, r)) a.get_potential_energy() bs = a.calc.band_structure() coords, labelcoords, labels = bs.get_labels() assert ''.join(labels) == path e_skn = bs.energies # bs.plot() if firsttime: coords1 = coords labelcoords1 = labelcoords
def read_proteindatabank(fileobj, index=-1, read_arrays=True): """Read PDB files.""" if isinstance(fileobj, str): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) occ = [] bfactor = [] residuenames = [] residuenumbers = [] atomtypes = [] symbols = [] positions = [] cell = None pbc = None def build_atoms(): atoms = Atoms(symbols=symbols, cell=cell, pbc=pbc, positions=positions) if not read_arrays: return atoms info = {'occupancy': occ, 'bfactor': bfactor, 'residuenames': residuenames, 'atomtypes': atomtypes, 'residuenumbers': residuenumbers} for name, array in info.items(): if len(array) == 0: pass elif len(array) != len(atoms): warnings.warn('Length of {} array, {}, ' 'different from number of atoms {}'. format(name, len(array), len(atoms))) else: atoms.set_array(name, np.array(array)) return atoms for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [float(line[6:15]), # a float(line[15:24]), # b float(line[24:33]), # c float(line[33:40]), # alpha float(line[40:47]), # beta float(line[47:54])] # gamma cell = cellpar_to_cell(cellpar) pbc = True for c in range(3): if line.startswith('ORIGX' + '123'[c]): orig[c] = [float(line[10:20]), float(line[20:30]), float(line[30:40])] trans[c] = float(line[45:55]) if line.startswith('ATOM') or line.startswith('HETATM'): # Atom name is arbitrary and does not necessarily # contain the element symbol. The specification # requires the element symbol to be in columns 77+78. # Fall back to Atom name for files that do not follow # the spec, e.g. packmol. # line_info = symbol, name, altloc, resname, coord, occupancy, # bfactor, resseq line_info = read_atom_line(line) try: symbol = label_to_symbol(line_info[0]) except (KeyError, IndexError): symbol = label_to_symbol(line_info[1]) position = np.dot(orig, line_info[4]) + trans atomtypes.append(line_info[1]) residuenames.append(line_info[3]) if line_info[5] is not None: occ.append(line_info[5]) bfactor.append(line_info[6]) residuenumbers.append(line_info[7]) symbols.append(symbol) positions.append(position) if line.startswith('END'): # End of configuration reached # According to the latest PDB file format (v3.30), # this line should start with 'ENDMDL' (not 'END'), # but in this way PDB trajectories from e.g. CP2K # are supported (also VMD supports this format). atoms = build_atoms() images.append(atoms) occ = [] bfactor = [] residuenames = [] atomtypes = [] symbols = [] positions = [] cell = None pbc = None if len(images) == 0: atoms = build_atoms() images.append(atoms) return images[index]
def read_jsv(f): """Reads a JSV file.""" natom = nbond = npoly = 0 symbols = [] labels = [] cellpar = basis = title = bonds = poly = origin = shell_numbers = None spacegroup = 1 headline = f.readline().strip() while True: line = f.readline() if not line: break line = line.strip() m = re.match(r'^\[([^]]+)\]\s*(.*)', line) if m is None or not line: continue tag = m.groups()[0].lower() if len(m.groups()) > 1: args = m.groups()[1].split() else: args = [] if tag == 'cell': cellpar = [float(x) for x in args] elif tag == 'natom': natom = int(args[0]) elif tag == 'nbond': nbond = int(args[0]) # optional margin of the bondlengths elif tag == 'npoly': npoly = int(args[0]) elif tag == 'space_group': spacegroup = Spacegroup(*tuple(int(x) for x in args)) elif tag == 'title': title = m.groups()[1] elif tag == 'atoms': symbols = [] basis = np.zeros((natom, 3), dtype=float) shell_numbers = -np.ones((natom, ), dtype=int) # float? for i in range(natom): tokens = f.readline().strip().split() labels.append(tokens[0]) symbols.append(ase.data.chemical_symbols[int(tokens[1])]) basis[i] = [float(x) for x in tokens[2:5]] if len(tokens) > 5: shell_numbers[i] = float(tokens[5]) # float? elif tag == 'bonds': for i in range(nbond): f.readline() bonds = NotImplemented elif tag == 'poly': for i in range(npoly): f.readline() poly = NotImplemented elif tag == 'origin': origin = NotImplemented else: raise ValueError('Unknown tag: "%s"' % tag) if headline == 'asymmetric_unit_cell': atoms = crystal( symbols=symbols, basis=basis, spacegroup=spacegroup, cellpar=cellpar, ) elif headline == 'full_unit_cell': atoms = ase.Atoms( symbols=symbols, scaled_positions=basis, cell=cellpar_to_cell(cellpar), ) atoms.info['spacegroup'] = Spacegroup(spacegroup) elif headline == 'cartesian_cell': atoms = ase.Atoms( symbols=symbols, positions=basis, cell=cellpar_to_cell(cellpar), ) atoms.info['spacegroup'] = Spacegroup(spacegroup) else: raise ValueError('Invalid JSV file type: "%s"' % headline) atoms.info['title'] = title atoms.info['labels'] = labels if bonds is not None: atoms.info['bonds'] = bonds if poly is not None: atoms.info['poly'] = poly if origin is not None: atoms.info['origin'] = origin if shell_numbers is not None: atoms.info['shell_numbers'] = shell_numbers return atoms
def __getitem__(self, i=-1): self._open() if isinstance(i, slice): return [self[j] for j in range(*i.indices(self._len()))] N = self._len() if 0 <= i < N: # Non-periodic boundaries have cell_length == 0.0 cell_lengths = \ np.array(self.nc.variables[self._cell_lengths_var][i][:]) pbc = np.abs(cell_lengths > 1e-6) # Do we have a cell origin? if self._has_variable(self._cell_origin_var): origin = np.array( self.nc.variables[self._cell_origin_var][i][:]) else: origin = np.zeros([3], dtype=float) # Do we have an index variable? if self._has_variable(self.index_var): index = np.array(self.nc.variables[self.index_var][i][:]) + \ self.index_offset else: index = np.arange(self.n_atoms) # Read element numbers self.numbers = self._get_data(self._numbers_var, i, exc=False) if self.numbers is None: self.numbers = np.ones(self.n_atoms, dtype=int) else: self.numbers = np.array(self.numbers[index]) if self.types_to_numbers is not None: self.numbers = self.types_to_numbers[self.numbers] self.masses = atomic_masses[self.numbers] # Read positions positions = np.array(self._get_data(self._positions_var, i)[index]) # Determine cell size for non-periodic directions for dim in np.arange(3)[np.logical_not(pbc)]: origin[dim] = positions[:, dim].min() cell_lengths[dim] = positions[:, dim].max() - origin[dim] # Construct cell shape from cell lengths and angles cell = cellpar_to_cell( list(cell_lengths) + list(self.nc.variables[self._cell_angles_var][i])) # Compute momenta from velocities (if present) momenta = self._get_data(self._velocities_var, i, exc=False) if momenta is not None: momenta = momenta[index] * self.masses.reshape(-1, 1) # Fill info dict with additional data found in the NetCDF file info = {} for name in self.extra_per_frame_atts: info[name] = np.array(self.nc.variables[name][i]) # Create atoms object atoms = ase.Atoms(positions=positions - origin.reshape(1, -1), numbers=self.numbers, cell=cell, momenta=momenta, masses=self.masses, pbc=pbc, info=info) # Attach additional arrays found in the NetCDF file for name in self.extra_per_frame_vars: atoms.set_array(name, self.nc.variables[name][i][index]) for name in self.extra_per_file_vars: atoms.set_array(name, self.nc.variables[name][:]) self._close() return atoms i = N + i if i < 0 or i >= N: self._close() raise IndexError('Trajectory index out of range.') return self[i]
def read_v_sim(filename='demo.ascii'): """Import V_Sim input file. Reads cell, atom positions, etc. from v_sim ascii file """ from ase import Atoms, units from ase.geometry import cellpar_to_cell import re if isinstance(filename, str): f = open(filename) else: # Assume it's a file-like object f = filename # Read comment: f.readline() line = f.readline() + ' ' + f.readline() box = line.split() for i in range(len(box)): box[i] = float(box[i]) keywords = [] positions = [] symbols = [] unit = 1.0 re_comment = re.compile('^\s*[#!]') re_node = re.compile('^\s*\S+\s+\S+\s+\S+\s+\S+') while(True): line = f.readline() if line == '': break # EOF p = re_comment.match(line) if p is not None: # remove comment character at the beginning of line line = line[p.end():].replace(',', ' ').lower() if line[:8] == "keyword:": keywords.extend(line[8:].split()) elif(re_node.match(line)): unit = 1.0 if not ("reduced" in keywords): if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr fields = line.split() positions.append([unit*float(fields[0]), unit*float(fields[1]), unit*float(fields[2])]) symbols.append(fields[3]) f.close() if ("surface" in keywords) or ("freeBC" in keywords): raise NotImplementedError # create atoms object based on the information if ("angdeg" in keywords): cell = cellpar_to_cell(box) else: unit = 1.0 if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr cell = [[unit*box[0], 0.0, 0.0], [unit*box[1], unit*box[2], 0.0], [unit*box[3], unit*box[4], unit*box[5]]] if ("reduced" in keywords): atoms = Atoms(cell=cell, scaled_positions=positions) else: atoms = Atoms(cell=cell, positions=positions) atoms.set_chemical_symbols(symbols) return atoms
allH = False if (len(sys.argv) >= 21): deltapos[0] = float(sys.argv[12]) deltapos[1] = float(sys.argv[13]) deltapos[2] = float(sys.argv[14]) cella = float(sys.argv[15]) cellb = float(sys.argv[16]) cellc = float(sys.argv[17]) alpha = float(sys.argv[18]) beta = float(sys.argv[19]) gamma = float(sys.argv[20]) if (len(sys.argv) >= 22): allH = bool(sys.argv[21]) # set unit cell cell = geometry.cellpar_to_cell([cella, cellb, cellc, alpha, beta, gamma]) invcell = inv(cell) print(cell) print(invcell) # Set centers for wrap_positions # for when you want to specify how to wrap atoms which are # positioned exactly on the unit cell edge. # This is useful when, for example, node2 is shifted (1,0,0) # and node1 has x position that lies right on UC boundary, # so you want it to wrap to the higher value (e.g., BAZJET). centercorrection = np.identity(3) * 0.00001 center1 = np.full(DIM, 0.5) + np.dot(centercorrection, deltapos) #center2 = np.full(DIM,0.5) - np.dot(centercorrection,deltapos) # Wrap node positions so they start in the same unit cell
def __getitem__(self, i=-1): self._open() if isinstance(i, slice): return [self[j] for j in range(*i.indices(self._len()))] N = self._len() if 0 <= i < N: # Non-periodic boundaries have cell_length == 0.0 cell_lengths = \ np.array(self.nc.variables[self._cell_lengths_var][i][:]) pbc = np.abs(cell_lengths > 1e-6) # Do we have a cell origin? if self._has_variable(self._cell_origin_var): origin = np.array( self.nc.variables[self._cell_origin_var][i][:]) else: origin = np.zeros([3], dtype=float) # Do we have an index variable? if (self.index_var is not None and self._has_variable(self.index_var)): index = np.array(self.nc.variables[self.index_var][i][:]) # The index variable can be non-consecutive, we here construct # a consecutive one. consecutive_index = np.zeros_like(index) consecutive_index[np.argsort(index)] = np.arange(self.n_atoms) else: consecutive_index = np.arange(self.n_atoms) # Read element numbers self.numbers = self._get_data(self._numbers_var, i, consecutive_index, exc=False) if self.numbers is None: self.numbers = np.ones(self.n_atoms, dtype=int) if self.types_to_numbers is not None: d = set(self.numbers).difference(self.types_to_numbers.keys()) if len(d) > 0: self.types_to_numbers.update({num: num for num in d}) func = np.vectorize(self.types_to_numbers.get) self.numbers = func(self.numbers) self.masses = atomic_masses[self.numbers] # Read positions positions = self._get_data(self._positions_var, i, consecutive_index) # Determine cell size for non-periodic directions from shrink # wrapped cell. for dim in np.arange(3)[np.logical_not(pbc)]: origin[dim] = positions[:, dim].min() cell_lengths[dim] = positions[:, dim].max() - origin[dim] # Construct cell shape from cell lengths and angles cell = cellpar_to_cell( list(cell_lengths) + list(self.nc.variables[self._cell_angles_var][i]) ) # Compute momenta from velocities (if present) momenta = self._get_data(self._velocities_var, i, consecutive_index, exc=False) if momenta is not None: momenta *= self.masses.reshape(-1, 1) # Fill info dict with additional data found in the NetCDF file info = {} for name in self.extra_per_frame_atts: info[name] = np.array(self.nc.variables[name][i]) # Create atoms object atoms = ase.Atoms( positions=positions, numbers=self.numbers, cell=cell, celldisp=origin, momenta=momenta, masses=self.masses, pbc=pbc, info=info ) # Attach additional arrays found in the NetCDF file for name in self.extra_per_frame_vars: atoms.set_array(name, self._get_data(name, i, consecutive_index)) for name in self.extra_per_file_vars: atoms.set_array(name, self._get_data(name, i, consecutive_index)) self._close() return atoms i = N + i if i < 0 or i >= N: self._close() raise IndexError('Trajectory index out of range.') return self[i]
def read_v_sim(filename='demo.ascii'): """Import V_Sim input file. Reads cell, atom positions, etc. from v_sim ascii file """ from ase import Atoms, units from ase.geometry import cellpar_to_cell import re if isinstance(filename, str): f = open(filename) else: # Assume it's a file-like object f = filename # Read comment: f.readline() line = f.readline() + ' ' + f.readline() box = line.split() for i in range(len(box)): box[i] = float(box[i]) keywords = [] positions = [] symbols = [] unit = 1.0 re_comment = re.compile(r'^\s*[#!]') re_node = re.compile(r'^\s*\S+\s+\S+\s+\S+\s+\S+') while (True): line = f.readline() if line == '': break # EOF p = re_comment.match(line) if p is not None: # remove comment character at the beginning of line line = line[p.end():].replace(',', ' ').lower() if line[:8] == "keyword:": keywords.extend(line[8:].split()) elif (re_node.match(line)): unit = 1.0 if not ("reduced" in keywords): if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr fields = line.split() positions.append([ unit * float(fields[0]), unit * float(fields[1]), unit * float(fields[2]) ]) symbols.append(fields[3]) f.close() if ("surface" in keywords) or ("freeBC" in keywords): raise NotImplementedError # create atoms object based on the information if ("angdeg" in keywords): cell = cellpar_to_cell(box) else: unit = 1.0 if (("bohr" in keywords) or ("bohrd0" in keywords) or ("atomic" in keywords) or ("atomicd0" in keywords)): unit = units.Bohr cell = [[unit * box[0], 0.0, 0.0], [unit * box[1], unit * box[2], 0.0], [unit * box[3], unit * box[4], unit * box[5]]] if ("reduced" in keywords): atoms = Atoms(cell=cell, scaled_positions=positions) else: atoms = Atoms(cell=cell, positions=positions) atoms.set_chemical_symbols(symbols) return atoms
def fix_cell(cell,eps=5e-3): cellpar=cell_to_cellpar(cell) for i in [3,4,5]: if abs(cellpar[i]/90.0*np.pi/2-np.pi/2)<eps: cellpar[i]=90.0 return cellpar_to_cell(cellpar)
def niggli_reduce_cell(cell, epsfactor=None): from ase.geometry import cellpar_to_cell if epsfactor is None: epsfactor = 1e-5 eps = epsfactor * abs(np.linalg.det(cell))**(1. / 3.) cell = np.asarray(cell) I3 = np.eye(3, dtype=int) I6 = np.eye(6, dtype=int) C = I3.copy() D = I6.copy() g0 = np.zeros(6, dtype=float) g0[0] = np.dot(cell[0], cell[0]) g0[1] = np.dot(cell[1], cell[1]) g0[2] = np.dot(cell[2], cell[2]) g0[3] = 2 * np.dot(cell[1], cell[2]) g0[4] = 2 * np.dot(cell[0], cell[2]) g0[5] = 2 * np.dot(cell[0], cell[1]) g = np.dot(D, g0) def lt(x, y, eps=eps): return x < y - eps def gt(x, y, eps=eps): return lt(y, x, eps) def eq(x, y, eps=eps): return not (lt(x, y, eps) or gt(x, y, eps)) for _ in range(10000): if (gt(g[0], g[1]) or (eq(g[0], g[1]) and gt(abs(g[3]), abs(g[4])))): C = np.dot(C, -I3[[1, 0, 2]]) D = np.dot(I6[[1, 0, 2, 4, 3, 5]], D) g = np.dot(D, g0) continue elif (gt(g[1], g[2]) or (eq(g[1], g[2]) and gt(abs(g[4]), abs(g[5])))): C = np.dot(C, -I3[[0, 2, 1]]) D = np.dot(I6[[0, 2, 1, 3, 5, 4]], D) g = np.dot(D, g0) continue lmn = np.array(gt(g[3:], 0, eps=eps / 2), dtype=int) lmn -= np.array(lt(g[3:], 0, eps=eps / 2), dtype=int) if lmn.prod() == 1: ijk = lmn.copy() for idx in range(3): if ijk[idx] == 0: ijk[idx] = 1 else: ijk = np.ones(3, dtype=int) if np.any(lmn != -1): r = None for idx in range(3): if lmn[idx] == 1: ijk[idx] = -1 elif lmn[idx] == 0: r = idx if ijk.prod() == -1: ijk[r] = -1 C *= ijk[np.newaxis] D[3] *= ijk[1] * ijk[2] D[4] *= ijk[0] * ijk[2] D[5] *= ijk[0] * ijk[1] g = np.dot(D, g0) if (gt(abs(g[3]), g[1]) or (eq(g[3], g[1]) and lt(2 * g[4], g[5])) or (eq(g[3], -g[1]) and lt(g[5], 0))): s = np.int(np.sign(g[3])) A = I3.copy() A[1, 2] = -s C = np.dot(C, A) B = I6.copy() B[2, 1] = 1 B[2, 3] = -s B[3, 1] = -2 * s B[4, 5] = -s D = np.dot(B, D) g = np.dot(D, g0) elif (gt(abs(g[4]), g[0]) or (eq(g[4], g[0]) and lt(2 * g[3], g[5])) or (eq(g[4], -g[0]) and lt(g[5], 0))): s = np.int(np.sign(g[4])) A = I3.copy() A[0, 2] = -s C = np.dot(C, A) B = I6.copy() B[2, 0] = 1 B[2, 4] = -s B[3, 5] = -s B[4, 0] = -2 * s D = np.dot(B, D) g = np.dot(D, g0) elif (gt(abs(g[5]), g[0]) or (eq(g[5], g[0]) and lt(2 * g[3], g[4])) or (eq(g[5], -g[0]) and lt(g[4], 0))): s = np.int(np.sign(g[5])) A = I3.copy() A[0, 1] = -s C = np.dot(C, A) B = I6.copy() B[1, 0] = 1 B[1, 5] = -s B[3, 4] = -s B[5, 0] = -2 * s D = np.dot(B, D) g = np.dot(D, g0) elif ( lt(g[[0, 1, 3, 4, 5]].sum(), 0) or (eq(g[[0, 1, 3, 4, 5]].sum(), 0) and gt(2 * (g[0] + g[4]) + g[5], 0))): A = I3.copy() A[:, 2] = 1 C = np.dot(C, A) B = I6.copy() B[2, :] = 1 B[3, 1] = 2 B[3, 5] = 1 B[4, 0] = 2 B[4, 5] = 1 D = np.dot(B, D) g = np.dot(D, g0) else: break else: raise RuntimeError('Niggli reduction not done in 10000 steps!\n' 'cell={}\n' 'operation={}'.format(cell.tolist(), C.tolist())) abc = np.sqrt(g[:3]) # Prevent division by zero e.g. for cell==zeros((3, 3)): abcprod = max(abc.prod(), 1e-100) cosangles = abc * g[3:] / (2 * abcprod) angles = 180 * np.arccos(cosangles) / np.pi newcell = np.array(cellpar_to_cell(np.concatenate([abc, angles])), dtype=float) return newcell, C
def read_proteindatabank(fileobj, index=-1, read_arrays=True): """Read PDB files.""" if isinstance(fileobj, basestring): fileobj = open(fileobj) images = [] orig = np.identity(3) trans = np.zeros(3) atoms = Atoms() occ = [] bfactor = [] for line in fileobj.readlines(): if line.startswith('CRYST1'): cellpar = [ float(line[6:15]), # a float(line[15:24]), # b float(line[24:33]), # c float(line[33:40]), # alpha float(line[40:47]), # beta float(line[47:54]) ] # gamma atoms.set_cell(cellpar_to_cell(cellpar)) atoms.pbc = True for c in range(3): if line.startswith('ORIGX' + '123'[c]): orig[c] = [ float(line[10:20]), float(line[20:30]), float(line[30:40]) ] trans[c] = float(line[45:55]) if line.startswith('ATOM') or line.startswith('HETATM'): try: # Atom name is arbitrary and does not necessarily # contain the element symbol. The specification # requires the element symbol to be in columns 77+78. # Fall back to Atom name for files that do not follow # the spec, e.g. packmol. try: symbol = label_to_symbol(line[76:78].strip()) except (KeyError, IndexError): symbol = label_to_symbol(line[12:16].strip()) # Don't use split() in case there are no spaces position = np.array([ float(line[30:38]), # x float(line[38:46]), # y float(line[46:54]) ]) # z try: occ.append(float(line[54:60])) bfactor.append(float(line[60:66])) except (IndexError, ValueError): pass position = np.dot(orig, position) + trans atoms.append(Atom(symbol, position)) except Exception as ex: warnings.warn( 'Discarding atom when reading PDB file: {}\n{}'.format( line.strip(), ex)) if line.startswith('END'): # End of configuration reached # According to the latest PDB file format (v3.30), # this line should start with 'ENDMDL' (not 'END'), # but in this way PDB trajectories from e.g. CP2K # are supported (also VMD supports this format). if read_arrays and len(occ) == len(atoms): atoms.set_array('occupancy', np.array(occ)) if read_arrays and len(bfactor) == len(atoms): atoms.set_array('bfactor', np.array(bfactor)) images.append(atoms) atoms = Atoms() occ = [] bfactor = [] if len(images) == 0: # Single configuration with no 'END' or 'ENDMDL' if read_arrays and len(occ) == len(atoms): atoms.set_array('occupancy', np.array(occ)) if read_arrays and len(bfactor) == len(atoms): atoms.set_array('bfactor', np.array(bfactor)) images.append(atoms) return images[index]