def analyse_bonds(model, A, B): ''' Check A-B distances present in the model. model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" A: string, chemical symbol, e.g. "H" B: string, chemical symbol, e.g. "H" ''' # Read file or Atoms object if isinstance(model, str) is True: model = read(model) analysis = Analysis(model) dash = "-" * 40 print_AB = A + "-" + B # Retrieve bonds and values AB_Bonds = analysis.get_bonds(A, B) AB_BondsValues = analysis.get_values(AB_Bonds) # Table header print(dash) print(print_AB+" Distance / Angstrom") print(dash) print('{:<6.5s}{:>4.10s}{:^13.10s}{:>4.10s}'.format( "count", "average", "minimum", "maximum")) # Table contents print('{:<6.0f}{:>4.6f}{:^12.6f}{:>4.6f}'.format( len(AB_BondsValues[0]), np.average(AB_BondsValues), np.amin(AB_BondsValues), np.amax(AB_BondsValues)))
def analyse_angles(model, A, B, C): ''' Check A-B distances present in the model. model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" A: string, chemical symbol, e.g. "O" B: string, chemical symbol, e.g. "C" C: string, chemical symbol, e.g. "O" ''' # Read file or Atoms object if isinstance(model, str) is True: model = read(model) analysis = Analysis(model) dash = "-"*40 print_ABC = A + "-" + B + "-" + C # Retrieve bonds and values ABC_Angle = analysis.get_angles(A, B, C) ABC_AngleValues = analysis.get_values(ABC_Angle) # Table header print(dash) print(print_ABC+" Angle / Degrees") print(dash) print('{:<6.5s}{:>4.10s}{:^13.10s}{:>4.10s}'.format( "count", "average", "minimum", "maximum")) # Table contents print('{:<6.0f}{:>4.4f}{:^12.4f}{:>4.4f}'.format( len(ABC_Angle[0]), np.average(ABC_AngleValues), np.amin(ABC_AngleValues), np.amax(ABC_AngleValues)))
def get_rdf_list(pos, r, nbin, frames, elements): """ pos: a list of atoms object r: the radial length nbin: the bin number in the radial range frames: how much pos number will you consider elements: the atom pair """ tmp_info = Analysis(pos) # this wil get a rdf for every snapshot tmp_rdf_list = tmp_info.get_rdf(r, nbin, imageIdx=slice(0, frames, 1), elements=elements) return tmp_rdf_list
def search_abnormal_bonds(model): ''' Check all bond lengths in the model for abnormally short ones, ie. less than 0.74 Angstrom. model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" ''' # Combination as AB = BA for bonds, avoiding redundancy from itertools import combinations_with_replacement # Read file or Atoms object if isinstance(model, str) is True: model = read(model) # Define lists of variables abnormal_bonds = [] list_of_abnormal_bonds = [] analysis = Analysis(model) # set() to ensure unique chemical symbols list list_of_symbols = list(set(model.get_chemical_symbols())) all_bonds = combinations_with_replacement(list_of_symbols, 2) # Iterate over all arrangements of chemical symbols for bonds in all_bonds: A = bonds[0] B = bonds[1] print_AB = A+'-'+B AB_Bonds = analysis.get_bonds(A, B) # Make sure bond exist before retrieving values if not AB_Bonds == [[]]: AB_BondsValues = analysis.get_values(AB_Bonds) for i in range(0, len(AB_BondsValues)): for values in AB_BondsValues[i]: if values < 0.74: abnormal_bonds += [1] list_of_abnormal_bonds = list_of_abnormal_bonds + [print_AB] # Abnormality check if not len(abnormal_bonds) == 0: print("A total of", len(abnormal_bonds), "abnormal bond lengths observed (<0.74 A).") print("Identities:", list_of_abnormal_bonds) else: print("OK")
def get_angles(cluster, mult=1, excluded_index=None, excluded_pair=None): """ #TODO: consider combining get_bonds and get_angles function ase.geometry.analysis.Analysis.unique_angles function does not work, return all angles. three-body interactions. :param excluded_pair: excluding all [particle1, particle2, particle3] lists involving the excluded pair """ if excluded_index is None: excluded_index = [] if excluded_pair is None: excluded_pair = [] nl = NeighborList(natural_cutoffs(cluster, mult=mult), bothways=True, self_interaction=False) nl.update(cluster) angle_list, shortened_list = [], [] for count, indices in enumerate(Analysis(cluster, nl=nl).all_angles[0]): for index in indices: if all( list(val) not in angle_list for val in list( permutations([count, index[0], index[1]]))): angle_list.append([count, index[0], index[1]]) for angle in angle_list: if all(single_index not in angle for single_index in excluded_index) and \ all(list(value) not in excluded_pair for value in list(permutations(angle, 2))): shortened_list.append(angle) return angle_list, shortened_list
def get_bonds(cluster, mult=1, excluded_index=None, excluded_pair=None): """ Using ase.geometry.analysis.Analysis to get all bonds, then remove the repeated ones. Function also allows removing certain bonding pair defined by user (excluded_pair). Or removing pairs including certain atomic indices (excluded_index). :param cluster: :param mult: :param excluded_index: list of integers :param excluded_pair: list of lists :return: full bonding list, shortened list. If both excluded_index and excluded_pair are None, bonding list == shortened list """ if excluded_index is None: excluded_index = [] if excluded_pair is None: excluded_pair = [] nl = NeighborList(natural_cutoffs(cluster, mult=mult), bothways=True, self_interaction=False) nl.update(cluster) bond_list, shortened_list = [], [] for count, indices in enumerate(Analysis(cluster, nl=nl).all_bonds[0]): for index in indices: if [count, index] not in bond_list and [index, count ] not in bond_list: bond_list.append([count, index]) for bond in bond_list: if all(single_index not in bond for single_index in excluded_index) and \ all(tuple(bond) not in list(permutations(pair)) for pair in excluded_pair): shortened_list.append(bond) return bond_list, shortened_list
def analyse_all_angles(model): ''' Returns a table of bond angle analysis for the supplied model. Parameters: model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" ''' # Product to get all possible arrangements from itertools import product # Read file or Atoms object if isinstance(model, str) is True: model = read(model) analysis = Analysis(model) dash = "-" * 40 # set() to ensure unique chemical symbols list list_of_symbols = list(set(model.get_chemical_symbols())) all_angles = product(list_of_symbols, repeat=3) # Table heading print(dash) print('{:<9.8s}{:<6.5s}{:>4.10s}{:^13.10s}{:>4.10s}'.format( "Angle", "Count", "Average", "Minimum", "Maximum")) print(dash) # Iterate over all arrangements of chemical symbols for angles in all_angles: A = angles[0] B = angles[1] C = angles[2] print_ABC = A + '-' + B + '-' + C ABC_Angle = analysis.get_angles(A, B, C) # Make sure angles exist before retrieving values, print table contents if not ABC_Angle == [[]]: ABC_AngleValues = analysis.get_values(ABC_Angle) print('{:<9.8s}{:<6.0f}{:>4.4f}{:^12.4f}{:>4.4f}'.format( print_ABC, len(ABC_Angle[0]), np.average(ABC_AngleValues), np.amin(ABC_AngleValues), np.amax(ABC_AngleValues)))
def analyse_all_bonds(model): ''' Returns a table of bond distance analysis for the supplied model. Parameters: model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" ''' # Combination as AB = BA for bonds, avoiding redundancy from itertools import combinations_with_replacement # Read file or Atoms object if isinstance(model, str) is True: model = read(model) analysis = Analysis(model) dash = "-" * 40 # set() to ensure unique chemical symbols list list_of_symbols = list(set(model.get_chemical_symbols())) all_bonds = combinations_with_replacement(list_of_symbols, 2) # Table heading print(dash) print('{:<6.5s}{:<6.5s}{:>4.10s}{:^13.10s}{:>4.10s}'.format( "Bond", "Count", "Average", "Minimum", "Maximum")) print(dash) # Iterate over all arrangements of chemical symbols for bonds in all_bonds: A = bonds[0] B = bonds[1] print_AB = A + '-' + B AB_Bonds = analysis.get_bonds(A, B) # Make sure bond exist before retrieving values, then print contents if not AB_Bonds == [[]]: AB_BondsValues = analysis.get_values(AB_Bonds) print('{:<8.8s}{:<6.0f}{:>4.6f}{:^12.6f}{:>4.6f}'.format( print_AB, len(AB_BondsValues[0]), np.average(AB_BondsValues), np.amin(AB_BondsValues), np.amax(AB_BondsValues)))
def get_psuedoatoms(atoms_obj, TRAP_atms, TRAP_bonds): ana_H = Analysis(atoms_obj) #Collect bond distances from ASE atoms.-Find easier way to do this! ASE_bond_dist = [] ASE_bond_atms = [] atm_bonds_list = ana_H.unique_bonds[0] for index, atm_bonds in enumerate(atm_bonds_list): for neighbor in atm_bonds: dist = np.linalg.norm(atoms_obj[index].position - atoms_obj[neighbor].position) ASE_bond_dist.append(dist) ASE_bond_atms.append([index, neighbor]) H_index = [] ASE_bond_atms_cp = ASE_bond_atms.copy() ASE_bond_dist_cp = ASE_bond_dist.copy() atoms_obj_cp = atoms_obj.copy() #If "H" is in psuedo potentials, we must include it and collect it's index. if 'H' in TRAP_atms[:, 1]: for bond in TRAP_bonds: if 'H' in bond[2].split('-'): H_bond_dist = float(bond[3]) indx, value = find_nearest(ASE_bond_dist_cp, H_bond_dist) for i in (ASE_bond_atms_cp[indx]): if atoms_obj_cp[i].symbol == 'H': H_index.append(atoms_obj_cp[i].index) ASE_bond_atms_cp.pop(indx) ASE_bond_dist_cp.pop(indx) #Determine hybridization. "atoms_C" does not strictly apply to carbon atoms. atoms_H = atoms_obj.copy() del atoms_H[[atom.index for atom in atoms_H if not atom.symbol == 'H']] atoms_C = atoms_obj.copy() del atoms_C[[ atom.index for atom in atoms_C if atom.symbol == 'H' and atom.index not in H_index ]] for i, C in enumerate(atoms_C): hybridization = 0 if C.symbol == 'C': for j, H in enumerate(atoms_H): if np.linalg.norm(C.position - H.position) < 1.2: hybridization += 1 atoms_C[i].mass += hybridization * (atoms_H[0].mass) atoms_C[i].tag = hybridization return atoms_C
def analyse_angles(model, A, B, C, verbose=True, multirow=False): ''' Check A-B-C angles present in the model. Parameters: model: Atoms object XXX A: string, chemical symbol, e.g. "O" B: string, chemical symbol, e.g. "C" C: string, chemical symbol, e.g. "O" verbose: Boolean Whether to print information to screen multirow: Boolean Whether we are returning multiple sets of results in a Table ''' from ase.geometry.analysis import Analysis analysis = Analysis(model) print_ABC = A + "-" + B + "-" + C # Retrieve bonds and values ABC_indices = analysis.get_angles(A, B, C) if len(ABC_indices[0]) == 0: ABC_values = None else: ABC_values = analysis.get_values(ABC_indices) if verbose and ABC_values is not None: # Table header if not multirow: print_angles_table_header() # Table contents import numpy as np print('{:<9.8s}{:<6.0f}{:>4.4f}{:^12.4f}{:>4.4f}'.format( print_ABC, len(ABC_indices[0]), np.average(ABC_values), np.amin(ABC_values), np.amax(ABC_values))) return ABC_indices, ABC_values
def analyse_bonds(model, A, B, verbose=True, multirow=False): ''' Check A-B distances present in the model. Parameters: model: Atoms object XXX A: string, chemical symbol, e.g. "H" B: string, chemical symbol, e.g. "H" verbose: Boolean Whether to print information to screen multirow: Boolean Whether we are working with analyse_all_bonds, so the output is multirow, or just one specific analysis of a bond, in which case the table header is needed. ''' from ase.geometry.analysis import Analysis analysis = Analysis(model) print_AB = A + "-" + B # Retrieve bonds and values AB_Bonds = analysis.get_bonds(A, B) if AB_Bonds == [[]]: AB_BondsValues = None else: AB_BondsValues = analysis.get_values(AB_Bonds) if verbose and AB_BondsValues is not None: if not multirow: print_bond_table_header() # Table contents import numpy as np print('{:<8.8s}{:<6.0f}{:>4.6f}{:^12.6f}{:>4.6f}'.format( print_AB, len(AB_BondsValues[0]), np.average(AB_BondsValues), np.amin(AB_BondsValues), np.amax(AB_BondsValues))) return print_AB, AB_Bonds, AB_BondsValues
def getslab(struct): """ Input: struct: structre from which we will trim unbound species (Atoms object) Output: baseslab: structure with unbound species trimmed """ adjmat = Analysis(struct, bothways=True).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i, j]: g.addEdge(i, j) cc = g.connectedComponents() maingraph = np.array([i for i in cc if 0 in i][0]) return struct[[atom.index for atom in struct if atom.index in maingraph]]
def bondAnalysis(data, focusElement = "C", bondelems = ["C", "F", "H", "Si", "N"], verbose = False): """ `data` should be a pd Series consisting of (structure id: Atoms object) pairs Length of returned values reflects only # of atoms of focusElement that have at least one bond to an atom in bondelems. """ analyses = {key:Analysis(value) for key, value in data.iteritems()} cbonds = {key: { i: a.get_bonds(focusElement, i)[0] for i in bondelems} for key, a in analyses.items()} cIdxs = {key: [atom.index for atom in value if atom.symbol == focusElement] for key, value in data.iteritems()} # construct cbonds cbonds = {} for key, lst in cIdxs.items(): _bonds = analyses[key].all_bonds[0] _struct = data[key] for idx in lst: mybonds = _bonds[idx] mybondDict = {} for bondelem in bondelems: mybondDict[bondelem] = sum(_struct[i].symbol == bondelem for i in mybonds) if np.sum(pd.Series(mybondDict)) == 0: if verbose: print("no bonds between focusElement and bondelems detected") else: cbonds[(key, idx)] = mybondDict cbonds = pd.DataFrame(cbonds).T # construct combos combos = {} combolists = {} for key, value in cbonds.iterrows(): newkey = "".join([key*value for key,value in value.iteritems() if value > 0]) newkey = Formula(newkey).format('hill') combos[newkey] = combos.get(newkey,0) + 1 combolists[newkey] = combolists.get(newkey,[]) + [key] combos = pd.Series(combos) combolists = pd.Series(combolists) return cbonds, combos, combolists
def getFragIndices(struct, check=False): """ Input: struct: structure (Atoms object) check: would you like to check max connectivity? Output: array of indices for fragments """ a = Analysis(struct, bothways=True) adjmat = a.adjacency_matrix[0].toarray() if check: maxbonds = {'Ar': 0, 'Si': 6, 'F': 1, 'N': 4, 'H': 1, 'C': 5} for i, adjrow in enumerate(adjmat): elem = struct[i].symbol while np.sum( adjrow) > maxbonds[elem] + 1: # +1 because adjmat[i,i] = 1 distances = { atom.index: struct.get_distance(i, atom.index) for atom in struct if adjrow[atom.index] } for j, a in enumerate(adjrow): if j in distances.keys() and distances[j] == np.max( list(distances.values())): adjrow[j] = 0 adjmat[j, i] = 0 # Delete both directions of the edge adjmat[i] = adjrow numnodes = adjmat.shape[0] # Adjacency matrix is NxN, N = #atoms g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i, j]: g.addEdge(i, j) cc = g.connectedComponents() fragIndices = np.array([i for i in cc if 0 not in i]) return fragIndices
def find_frag_perms(R, z, lat_and_inv=None, callback=None, max_processes=None): from ase import Atoms from ase.geometry.analysis import Analysis from scipy.sparse.csgraph import connected_components print('Finding permutable non-bonded fragments... (assumes Ang!)') # TODO: positions must be in Angstrom for this to work!! n_train, n_atoms = R.shape[:2] atoms = Atoms( z, positions=R[0] ) # only use first molecule in dataset to find connected components (fix me later, maybe) # *0.529177249 adj = Analysis(atoms).adjacency_matrix[0] _, labels = connected_components(csgraph=adj, directed=False, return_labels=True) frags = [] for label in np.unique(labels): frags.append(np.where(labels == label)[0]) n_frags = len(frags) if n_frags == n_atoms: print( 'Skipping fragment symmetry search (something went wrong, e.g. length unit not in Angstroms, etc.)' ) return [range(n_atoms)] # print(labels) # from . import ui, io # xyz_str = io.generate_xyz_str(R[0][np.where(labels == 0)[0], :]*0.529177249, z[np.where(labels == 0)[0]]) # xyz_str = ui.indent_str(xyz_str, 2) # sprint(xyz_str) # NEW # uniq_labels = np.unique(labels) # R_cg = np.empty((R.shape[0], len(uniq_labels), R.shape[2])) # z_frags = [] # z_cg = [] # for label in uniq_labels: # frag_idxs = np.where(labels == label)[0] # R_cg[:,label,:] = np.mean(R[:,frag_idxs,:], axis=1) # z_frag = np.sort(z[frag_idxs]) # z_frag_label = 0 # if len(z_frags) == 0: # z_frags.append(z_frag) # else: # z_frag_label = np.where(np.all(z_frags == z_frag, axis=1))[0] # if len(z_frag_label) == 0: # not found # z_frag_label = len(z_frags) # z_frags.append(z_frag) # else: # z_frag_label = z_frag_label[0] # z_cg.append(z_frag_label) # print(z_cg) # print(R_cg.shape) # perms = find_perms(R_cg, np.array(z_cg), lat_and_inv=lat_and_inv, max_processes=max_processes) # print('cg perms') # print(perms) # NEW # print(n_frags) print('| Found ' + str(n_frags) + ' disconnected fragments.') n_frags_unique = 0 # number of unique fragments # match fragments to find identical ones (allows permutations of fragments) swap_perms = [np.arange(n_atoms)] for f1 in range(n_frags): for f2 in range(f1 + 1, n_frags): sort_idx_f1 = np.argsort(z[frags[f1]]) sort_idx_f2 = np.argsort(z[frags[f2]]) inv_sort_idx_f2 = inv_perm(sort_idx_f2) z1 = z[frags[f1]][sort_idx_f1] z2 = z[frags[f2]][sort_idx_f2] if np.array_equal(z1, z2): # fragment have the same composition n_frags_unique += 1 for ri in range( min(10, R.shape[0]) ): # only use first molecule in dataset for matching (fix me later) R_match1 = R[ri, frags[f1], :] R_match2 = R[ri, frags[f2], :] #if np.array_equal(z1, z2): R_pair = np.concatenate( (R_match1[None, sort_idx_f1, :], R_match2[None, sort_idx_f2, :])) perms = find_perms(R_pair, z1, lat_and_inv=lat_and_inv, max_processes=max_processes) # embed local permutation into global context for p in perms: match_perm = sort_idx_f1[p][inv_sort_idx_f2] swap_perm = np.arange(n_atoms) swap_perm[frags[f1]] = frags[f2][match_perm] swap_perm[frags[f2][match_perm]] = frags[f1] swap_perms.append(swap_perm) swap_perms = np.unique(np.array(swap_perms), axis=0) print('| Found ' + str(n_frags_unique) + ' (likely to be) *unique* disconnected fragments.') # commplete symmetric group sym_group_perms = complete_sym_group(swap_perms) print('| Found ' + str(sym_group_perms.shape[0]) + ' fragment permutations after closure.') # match fragments with themselves (to find symmetries in each fragment) if n_frags > 1: print('| Matching individual fragments.') for f in range(n_frags): R_frag = R[:, frags[f], :] z_frag = z[frags[f]] # print(R_frag.shape) # print(z_frag) print(f) perms = find_perms(R_frag, z_frag, lat_and_inv=lat_and_inv, max_processes=max_processes) # print(f) print(perms) f = 0 perms = find_perms_via_alignment(R[0, :, :], frags[f], [215, 214, 210, 211], [209, 208, 212, 213], z, lat_and_inv=lat_and_inv, max_processes=max_processes) #perms = find_perms_via_alignment(R[0, :, :], frags[f], [214, 215, 210, 211], [209, 208, 212, 213], z, lat_and_inv=lat_and_inv, max_processes=max_processes) sym_group_perms = np.vstack((perms[None, :], sym_group_perms)) sym_group_perms = complete_sym_group(sym_group_perms, callback=callback) #print(sym_group_perms.shape) #import sys #sys.exit() return sym_group_perms
def main( datadir = "temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir = "temp.new/", #files for output hbondrange = 6, #offset from surface corresponding to Hbond range zmincutoff = 0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk output_geom_name = "output", #prefix for output geometry files output_velos_name = "velos" #prefix for output velocity files ): ############################## ### Read in geometry files ### ############################## hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) geometries = {} for i in os.listdir(datadir): if output_geom_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if output_velos_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header = None, dtype = float, sep = "\s+") ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F':0, 'C':0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i,j]: g.addEdge(i,j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs] atomsToRemove = [i for g,r in zip(fragGraphs, removeFrag) if r for i in g] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2,2]] for idx in atomsToRemove: removedatoms[geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[atom.index for atom in geomcopy if atom.index in atomsToRemove]] removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[i not in atomsToRemove for i in np.arange(len(velos[key]))]] # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%s%s%s.in" % (outputdir, output_velos_name, key), sep = " ", index = False, header = False)
def process(self): with open(self.raw_paths[1], 'r') as f: target = f.read().split('\n')[0:-1] target = [float(i) for i in target] target = torch.tensor(target, dtype=torch.float) omdData = OrganicMaterialsDatabase(self.raw_paths[0], download=False) print(len(omdData)) data_list = [] for i in range(len(omdData)): mol = omdData[i] if mol is None: print(str(i),' not a molecule') continue pos = mol['_positions'] print('after constructing positions') atomic_number = [] for atom_num in mol['_atomic_numbers']: atomic_number.append(atom_num.item()) row, col, bond_idx = [], [], [] print('before fetching the atom properties ',str(i)) at_obj = omdData.get_atoms(idx=i) print('after fetching the atom properties') #added atomic masses atomic_masses = [] for atomic_mass in at_obj.get_masses(): atomic_masses.append(atomic_mass) N = len(atomic_number) x = torch.tensor([ atomic_number, #atomic_masses ], dtype=torch.float).t().contiguous() bond_anal = Analysis(at_obj) for bond_list in bond_anal.unique_bonds: for start, atom_bond_list in enumerate(bond_list): for end in atom_bond_list: row += [start, end] col += [end, start] bond_idx += 2 * [self.bonds["SINGLE"]] print('after constructing the bonds') # cutoff_radius = 5.0 # all_distances = at_obj.get_all_distances() # cutoff_distance_nodes = np.array(np.nonzero(all_distances <= cutoff_radius)) # for ydim in range(0,cutoff_distance_nodes.shape[1]): # row += [cutoff_distance_nodes[0][ydim], cutoff_distance_nodes[1][ydim]] # col += [cutoff_distance_nodes[1][ydim], cutoff_distance_nodes[0][ydim]] # bond_idx += 2 * [self.bonds["SINGLE"]] edge_index = torch.tensor([row, col], dtype=torch.long) edge_attr = F.one_hot(torch.tensor(bond_idx), num_classes=len(self.bonds)).to(torch.float) edge_index, edge_attr = coalesce(edge_index, edge_attr, N, N) y = target[i].unsqueeze(0) name = str(at_obj.symbols) print('constructing data') data = Data(x=x, pos=pos, edge_index=edge_index,edge_attr=edge_attr, y=y, name=name) print('after constructing data') if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) # print('----------------') # print(data.x.shape) # print(data.edge_index.shape) # print(data.x) # print(data.edge_index) # print('------------------') data_list.append(data) print('saving data') torch.save(self.collate(data_list), self.processed_paths[0])
AllPbIBonds = [] AllSnIBonds = [] AllPbIAngles = [] AllSnIAngles = [] AllPbIPbAngles = [] AllSnISnAngles = [] for i in range(0, 16): # VASP POSCAR PEA2PbSnI4 = read('CONTCAR_PbSn' + str(i)) #print(PEA2PbSnI4) ana = Analysis(PEA2PbSnI4) # Bonds PbIBonds = ana.get_bonds('Pb', 'I', unique=True) SnIBonds = ana.get_bonds('Sn', 'I', unique=True) #print("\nThere are {} Pb-I bonds in PEA2PbSnI4.".format(len(PbIBonds[0]))) #print("There are {} Pb-I bonds in PEA2PbSnI4.".format(len(SnIBonds[0]))) if len(PbIBonds[0]) > 0: PbIBondValues = ana.get_values(PbIBonds) if len(SnIBonds[0]) > 0: SnIBondValues = ana.get_values(SnIBonds)
crystal = read(filename) crystal1 = read(filename1) corrdinates = crystal.get_positions() cell_length = crystal.get_cell_lengths_and_angles() cell_length = cell_length[0:3] # only select the cell length dr = 0.01 # shperical shell radius dr min_length_cell = min(cell_length) # select the smalles length in cell rmax = (min_length_cell / 2) - 0.1 # 2*rmax < min_length_cell bins = np.rint((min_length_cell / 3) * 100) bins = bins.astype(int) rdf = Analysis(crystal) rdf2 = Analysis(crystal1) g = rdf.get_rdf(rmax, bins) g_r = rdf.get_rdf(rmax, bins) r = np.linspace(0, rmax, bins) y1 = np.array(g) y = np.array(g_r) y1 = np.transpose(y1) y = np.transpose(y) print(np.count_nonzero(y)) plt.figure() plt.plot(r, y, color='black') plt.plot(r, y1, color='blue') plt.xlabel('r')
def transmute(threshold, transmute=True, target="C", slabElems=["Si", "N"], surfDepth=6, simple=True, numStructs=-1, numOut=-1, **kwargs): """ Find undercoordinated surface/near-surface atoms and transmute them to C Args: - threshold (k): maxiumum number of bonds to be considered "undercoordinated" (should be 1 or 2) - transmute: whether to transmute elements or not - slabElems: elements to consider as part of the 'core slab'; only these will be transmuted - surfDepth: depth of atoms to consider below surface; surface defined as max z coord among slabElems - simple: Use basic bond counting as implemented in ASE, or advanced with coordLabeller from `dependencies` (simple=False is UNTESTED!!!) - numStructs: Number of structures to consider - numOut: Number of structures to output - kwargs: to be passed to the readStructs function from `dependencies` """ simple = True # only support simple bond count for now if numStructs > 0: data = readStructs(**kwargs)['geom'][:numStructs] else: data = readStructs(**kwargs)['geom'] ## make bond counts if simple: analyses = {key: Analysis(value) for key, value in data.items()} bonds = {key: value.all_bonds[0] for key, value in analyses.items()} numBonds = { key: [len(i) for i in value] for key, value in bonds.items() } for key, b in bonds.items(): for idx, atomBonds in enumerate(b): for bond in atomBonds: e1, e2 = data[key][idx].symbol, data[key][bond].symbol if e1 not in slabElems or e2 not in slabElems: numBonds[key][idx] -= 1 else: bonds = { key: coordLabeller(value, 0, angle_tolerance=.25, bond_tolerance=.15, minz=minz)[1] for key, value in data.items() } numBonds = { key: [len(value[i]) for i in range(len(value))] for key, value in bonds.items() } for key, b in bonds.items(): for idx, bond in b.items(): #dictionary for bond in atomBonds: e1, e2 = data[key][idx].symbol, data[key][bond].symbol if e1 not in slabElems or e2 not in slabElems: numBonds[key][idx] -= 1 ## look for criteria: surface depth, core slab elements, low coordination toReplace = {} for key, numBondList in numBonds.items(): minz = np.max([ atom.position[2] for atom in data[key] if atom.symbol in slabElems ]) - surfDepth toReplace[key] = [ int(nBonds <= threshold and data[key][i].symbol in slabElems and data[key][i].position[2] > minz) for i, nBonds in enumerate(numBondList) ] ## set labels for atoms to be transmuted, and generate counts numTransmuted = {} for key, value in data.items(): data[key].set_tags(toReplace[key]) data[key].wrap() numTransmuted[key] = np.sum(toReplace[key]) data = pd.DataFrame(data) data['n'] = pd.Series(numTransmuted) data = data.sort_values("n", ascending=False) if numOut > 0: data = data.iloc[:numOut, :] ## transmute atoms if transmute: for key in data.index: for atom in data['geom'][key]: if toReplace[key][atom.index]: atom.symbol = target return data
#test the geometry.analysis module import numpy as np from ase.geometry.analysis import Analysis from ase.build import molecule mol = molecule('CH3CH2OH') ana = Analysis(mol) assert np.shape(ana.adjacency_matrix[0].todense()) == (9, 9) for imI in range(len(ana.all_bonds)): l1 = sum([len(x) for x in ana.all_bonds[imI]]) l2 = sum([len(x) for x in ana.unique_bonds[imI]]) assert l1 == l2 * 2 for imi in range(len(ana.all_angles)): l1 = sum([len(x) for x in ana.all_angles[imi]]) l2 = sum([len(x) for x in ana.unique_angles[imi]]) assert l1 == l2 * 2 for imi in range(len(ana.all_dihedrals)): l1 = sum([len(x) for x in ana.all_dihedrals[imi]]) l2 = sum([len(x) for x in ana.unique_dihedrals[imi]]) assert l1 == l2 * 2 assert len(ana.get_angles('C','C','H', unique=False)[0]) == len(ana.get_angles('C','C','H', unique=True)[0])*2 csixty = molecule('C60') mol = molecule('C7NH5') ana = Analysis(csixty) ana2 = Analysis(mol)
from ase.io.trajectory import Trajectory import numpy as np from ase.io import read from ase.geometry.analysis import Analysis #old file see bonds.py for better version file = read('tin_acetate.xyz') traj = Trajectory('tin_acetate.traj', 'w') traj.write(file) file = read('tin_acetate.traj') ana = Analysis(file) SiOBonds = ana.get_bonds('Sn', 'O') SiOSiAngles = ana.get_angles('O', 'Sn', 'O') print("there are {} Si-O bonds in BETA".format(len(SiOBonds[0]))) print("there are {} Si-O-Si angles in BETA".format(len(SiOSiAngles[0]))) SiOBondsValues = ana.get_values(SiOBonds) SiOSiAngleValues = ana.get_values(SiOSiAngles) print("bond length data:") print("the average Si-O bond length is {}.".format(np.average(SiOBondsValues))) print("the minimum Si-O Distance is:", np.amin(SiOBondsValues)) print("the maximum Si-O Distance is:", np.amax(SiOBondsValues)) print("bond angle data:") print("the average Si-O-Si angle is {}.".format(np.average(SiOSiAngleValues))) print("the maximum Si-O-Si angle is:", np.amax(SiOSiAngleValues)) print("the minimum Si-O-Si angle is:", np.amin(SiOSiAngleValues))
def test_analysis(): #test the geometry.analysis module mol = molecule('CH3CH2OH') ana = Analysis(mol) assert np.shape(ana.adjacency_matrix[0].todense()) == (9, 9) for imI in range(len(ana.all_bonds)): l1 = sum([len(x) for x in ana.all_bonds[imI]]) l2 = sum([len(x) for x in ana.unique_bonds[imI]]) assert l1 == l2 * 2 for imi in range(len(ana.all_angles)): l1 = sum([len(x) for x in ana.all_angles[imi]]) l2 = sum([len(x) for x in ana.unique_angles[imi]]) assert l1 == l2 * 2 for imi in range(len(ana.all_dihedrals)): l1 = sum([len(x) for x in ana.all_dihedrals[imi]]) l2 = sum([len(x) for x in ana.unique_dihedrals[imi]]) assert l1 == l2 * 2 assert len(ana.get_angles('C', 'C', 'H', unique=False)[0]) == len( ana.get_angles('C', 'C', 'H', unique=True)[0]) * 2 csixty = molecule('C60') mol = molecule('C7NH5') ana = Analysis(csixty) ana2 = Analysis(mol) for imI in range(len(ana.all_bonds)): l1 = sum([len(x) for x in ana.all_bonds[imI]]) l2 = sum([len(x) for x in ana.unique_bonds[imI]]) assert l1 == l2 * 2 for imI in range(len(ana.all_angles)): l1 = sum([len(x) for x in ana.all_angles[imI]]) l2 = sum([len(x) for x in ana.unique_angles[imI]]) assert l1 == l2 * 2 for imI in range(len(ana.all_dihedrals)): l1 = sum([len(x) for x in ana.all_dihedrals[imI]]) l2 = sum([len(x) for x in ana.unique_dihedrals[imI]]) assert l1 == l2 * 2 assert len(ana2.get_angles('C', 'C', 'H', unique=False)[0]) == len( ana2.get_angles('C', 'C', 'H', unique=True)[0]) * 2 assert len(ana2.get_dihedrals('H', 'C', 'C', 'H', unique=False)[0]) == len( ana2.get_dihedrals('H', 'C', 'C', 'H', unique=True)[0]) * 2
def search_abnormal_bonds(model, verbose=True): ''' Check all bond lengths in the model for abnormally short ones, ie. less than 0.74 Angstrom. Parameters: model: Atoms object or string. If string it will read a file in the same folder, e.g. "name.traj" ''' # Combination as AB = BA for bonds, avoiding redundancy from itertools import combinations_with_replacement # Imports necessary to work out accurate minimum bond distances from ase.data import chemical_symbols, covalent_radii # Read file or Atoms object if isinstance(model, str) is True: model = read(model) # Define lists of variables abnormal_bonds = [] list_of_abnormal_bonds = [] analysis = Analysis(model) # set() to ensure unique chemical symbols list list_of_symbols = list(set(model.get_chemical_symbols())) all_bonds = combinations_with_replacement(list_of_symbols, 2) # Iterate over all arrangements of chemical symbols for bonds in all_bonds: A = bonds[0] B = bonds[1] # For softcoded bond cutoff sum_of_covalent_radii = covalent_radii[chemical_symbols.index( A)] + covalent_radii[chemical_symbols.index(B)] print_AB = A + '-' + B AB_Bonds = analysis.get_bonds(A, B) # Make sure bond exist before retrieving values if not AB_Bonds == [[]]: AB_BondsValues = analysis.get_values(AB_Bonds) for i in range(0, len(AB_BondsValues)): for values in AB_BondsValues[i]: # TODO: move the 75% of sum_of_covalent_radii before the loops if values < max(0.4, sum_of_covalent_radii * 0.75): abnormal_bonds += [1] list_of_abnormal_bonds = list_of_abnormal_bonds + [ print_AB ] # Abnormality check # is it possible to make a loop with different possible values instead of 0.75 and takes the average if len(abnormal_bonds) > 0: if verbose: print( "A total of", len(abnormal_bonds), "abnormal bond lengths observed (<" + str(max(0.4, sum_of_covalent_radii * 0.75)) + " A).") print("Identities:", list_of_abnormal_bonds) return False else: if verbose: print("OK") return True
def analyzeFragments(datadir, **kwargs): """ Pass in `name` and `shallow` kwargs if needed for utils.readStruct function """ geometries = readStructs(datadir, **kwargs)['geom'] analyses = {key: Analysis(item) for key, item in geometries.items()} analyses = pd.Series(analyses) ##################### ### fragmentation ### ##################### fragmentLists = [] for struct in geometries: adjmat = Analysis(struct).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i,j]: g.addEdge(i,j) cc = g.connectedComponents() isSmallgraph = np.array([len(i) for i in cc]) < 10 smallgraphs = [] for i, subgraph in enumerate(cc): if isSmallgraph[i]: smallgraphs += [struct[[atom.index for atom in struct if atom.index in subgraph]]] fragmentLists += [smallgraphs] flatten = lambda t: [item for sublist in t for item in sublist] fragmentTypes = np.unique([i.symbols.get_chemical_formula() for i in flatten(fragmentLists)]) fragdict = {i:j for i, j in zip(geometries.keys(), fragmentLists)} fragmentData = pd.DataFrame({key: [0] * len(geometries) for key in fragmentTypes}) fragmentData.index = fragdict.keys() for key, fragmentList in fragdict.items(): for fragment in fragmentList: _symbol = fragment.symbols.get_chemical_formula() fragmentData[_symbol].loc[key] += 1 fragmentData.to_csv(datadir + "fragdata.csv") print(fragmentData.sum(axis = 0)) ################### ### bond counts ### ################### # totalbonds = [] # bondcounts = {} # for key, analysis in analyses.items(): # try: # totalbonds += [len(analysis.get_bonds(e1, e2, unique = True)[0])] # bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0]) # except: # print('error on {}'.format(key)) # totalbonds = np.array(totalbonds) # if form: # print('percent runs with {}-{} bond formation = {}'.format(e1, e2, np.sum(totalbonds > 0)/170)) # else: # print('average number of final {}-{} bonds = {}'.format(e1, e2, np.sum(totalbonds)/170)) # # plt.hist(totalbonds, bins = np.arange(0, np.max(totalbonds) + 1)) # # plt.hist(totalbonds, bins = np.arange(5, 14)) # if form: # plt.title('distribution of # of {}-{} bonds formed'.format(e1, e2)); # else: # plt.title('distribution of # of {}-{} bonds count'.format(e1, e2)); # plt.show() from itertools import combinations elems = ["Si", "F", "N", "C", "H", "Ar"] result = {} for e1, e2 in combinations(elems, 2): bondcounts = {} for key, analysis in analyses.items(): bondcounts[key] = len(analysis.get_bonds(e1, e2, unique = True)[0]) bondcounts = pd.Series(bondcounts) result["{}-{}".format(e1, e2)] = bondcounts pd.DataFrame(result).to_csv(datadir+"bondcounts.csv")
from ase.io.trajectory import Trajectory import numpy as np from ase.io import read from ase.geometry.analysis import Analysis BEA = read('BEA.cif') traj = Trajectory('BEA.traj', 'w') traj.write(BEA) BEA = read('BEA.traj') ana = Analysis(BEA) SiOBonds = ana.get_bonds('Si', 'O') SiOSiAngles = ana.get_angles('Si', 'O', 'Si') print("there are {} Si-O bonds in BETA".format(len(SiOBonds[0]))) print("there are {} Si-O-Si angles in BETA".format(len(SiOSiAngles[0]))) SiOBondsValues = ana.get_values(SiOBonds) SiOSiAngleValues = ana.get_values(SiOSiAngles) print("bond length data:") print("the average Si-O bond length is {}.".format(np.average(SiOBondsValues))) print("the minimum Si-O Distance is:", np.amin(SiOBondsValues)) print("the maximum Si-O Distance is:", np.amax(SiOBondsValues)) print("bond angle data:") print("the average Si-O-Si angle is {}.".format(np.average(SiOSiAngleValues))) print("the maximum Si-O-Si angle is:", np.amax(SiOSiAngleValues)) print("the minimum Si-O-Si angle is:", np.amin(SiOSiAngleValues))
Main Program Begins Here ''' #TRAPPE #Collect TRAPPE input params TRAPPE_fname = 'trappe_parameters_99.txt' TRAPPE_atoms, TRAPPE_bonds, TRAPPE_name = get_TRAPPE_params(TRAPPE_fname) #ASE #Convert Smiles string into Atoms object. SMILES = 'O=C(C=C)OCCCCCCCC' atoms = pubchem_atoms_search(smiles=SMILES) #Determine psuedo-atoms of molecule and remove hydrogens. psuedo_atoms = get_psuedoatoms(atoms,TRAPPE_atoms,TRAPPE_bonds) ana = Analysis(psuedo_atoms) #Create geometry analysis object from Atoms object. organized_ASE_atoms = Order_atoms_wrt_TRAPPE(psuedo_atoms,ana,TRAPPE_bonds,TRAPPE_atoms) #Center the psuedo_atoms. FEASST requires atom[0] to be located within origin! for atm in organized_ASE_atoms: if atm[0] == 1: center_about = np.copy(atm[-1]) break for atm in organized_ASE_atoms: atm[-1] -= center_about ''' File Output '''
def main( numsofar, #use the run number you're seeding for batch, #current batch number velo, # velocity of incident Ar in Å/ps datadir="temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir="temp.new/", #files for output hbondrange=6, zmincutoff=0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk numperbatch=17, numbatches=10): numsofar = int(numsofar) batch = int(batch) velo = float(velo) hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) numperbatch = int(numperbatch) numbatches = int(numbatches) ############################## ### Read in geometry files ### ############################## geometries = {} for i in os.listdir(datadir): if "output" in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if "velos" in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header=None, dtype=float, sep="\s+") # to account for seed behavior from first numssofar sets of runs # numssofar can also be interpreted as = current run seeding for np.random.seed(429) for b in range(batch + numsofar * numbatches): for i in range(numperbatch): x_rand, y_rand, z_rand = np.append(np.random.random(size=2), 0) ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F': 0, 'C': 0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i, j]: g.addEdge(i, j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [ np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs ] atomsToRemove = [ i for g, r in zip(fragGraphs, removeFrag) if r for i in g ] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2, 2]] for idx in atomsToRemove: removedatoms[ geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[ atom.index for atom in geomcopy if atom.index in atomsToRemove ]] x_rand, y_rand, z_rand = geomcopy.cell.cartesian_positions( np.append(np.random.random(size=2), 0)) add_adsorbate(geomcopy, adsorbate='Ar', height=7, position=(x_rand, y_rand)) removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[ i not in atomsToRemove for i in np.arange(len(velos[key])) ]] trimmedvelos[key] = trimmedvelos[key].append(pd.Series([0, 0, -velo]), ignore_index=True) # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%svelos%s.in" % (outputdir, key), sep=" ", index=False, header=False)