def test_find_pattern_in_structure__cnnc_over_x_pbc_has_positions_across_x_pbc(linear_cnnc): linear_cnnc.positions = (linear_cnnc.positions + (-0.5, 0.0, 0.0)) % 15 linear_cnnc.pop(-1) #don't match final NC search_pattern = Atoms(elements='CN', positions=[(0.0, 0., 0), (1.0, 0., 0.)]) match_indices, match_positions = find_pattern_in_structure(linear_cnnc, search_pattern, return_positions=True) assert (linear_cnnc[match_indices[0]].positions == [(14.5, 0., 0.), (0.5, 0., 0.)]).all() assert (match_positions[0] == np.array([(14.5, 0., 0.), (15.5, 0., 0.)])).all()
def test_replace_pattern_in_structure__replace_hydrogens_in_octane_with_fluorines_half_the_time(octane): search_pattern = Atoms(elements='H', positions=[(0, 0, 0)]) replace_pattern = Atoms(elements='F', positions=[(0, 0, 0)]) match_indices = find_pattern_in_structure(octane, search_pattern) final_structure = replace_pattern_in_structure(octane, search_pattern, replace_pattern, replace_fraction=0.5) assert Counter(final_structure.elements) == {"H":9, "F": 9, "C": 8} assert_structure_positions_are_unchanged(octane, final_structure)
def test_find_pattern_in_structure__hkust1_unit_cell_has_48_Cu_metal_nodes(hkust1_cif): pattern = Atoms(elements='Cu', positions=[(0, 0, 0)]) match_indices = find_pattern_in_structure(hkust1_cif, pattern) assert len(match_indices) == 48 for indices in match_indices: pattern_found = hkust1_cif[indices] assert list(pattern_found.elements) == ['Cu']
def test_find_pattern_in_structure__hkust1_xyz_3x3x3_supercell_has_1296_Cu_metal_nodes(hkust1_3x3x3_xyz): pattern = Atoms(elements='Cu', positions=[(0, 0, 0)]) match_indices = find_pattern_in_structure(hkust1_3x3x3_xyz, pattern) assert len(match_indices) == 1296 for indices in match_indices: pattern_found = hkust1_3x3x3_xyz[indices] assert list(pattern_found.elements) == ['Cu']
def test_find_pattern_in_structure__hkust1_unit_cell_offset_has_32_benzene_rings(hkust1_cif, benzene): hkust1_cif.translate((-4,-4,-4)) hkust1_cif.positions = hkust1_cif.positions % np.diag(hkust1_cif.cell) match_indices, coords = find_pattern_in_structure(hkust1_cif, benzene, return_positions=True) for i, indices in enumerate(match_indices): assert list(hkust1_cif[indices].elements) == ['C','C','C','C','C','C','H','H','H'] assert_benzene(coords[i]) assert len(match_indices) == 32
def test_find_pattern_in_structure__hkust1_unit_cell_has_32_benzene_rings(hkust1_cif, benzene): match_indices = find_pattern_in_structure(hkust1_cif, benzene) assert len(match_indices) == 32 for indices in match_indices: pattern_found = hkust1_cif[indices] assert list(pattern_found.elements) == ['C','C','C','C','C','C','H','H','H'] assert_benzene(pattern_found.positions)
def test_find_pattern_in_structure__hkust1_cif_3x3x3_supercell_has_1296_Cu_metal_nodes(hkust1_cif): hkust1_3x3x3 = hkust1_cif.replicate(repldims=(3,3,3)) pattern = Atoms(elements='Cu', positions=[(0, 0, 0)]) match_indices = find_pattern_in_structure(hkust1_3x3x3, pattern) assert len(match_indices) == 1296 for indices in match_indices: pattern_found = hkust1_3x3x3[indices] assert list(pattern_found.elements) == ['Cu']
def test_find_pattern_in_structure__hkust1_cif_2x2x2_supercell_has_256_benzene_rings(hkust1_cif, benzene): hkust1_2x2x2 = hkust1_cif.replicate(repldims=(2,2,2)) match_indices = find_pattern_in_structure(hkust1_2x2x2, benzene) assert len(match_indices) == 256 for indices in match_indices: pattern_found = hkust1_2x2x2[indices] assert list(pattern_found.elements) == ['C','C','C','C','C','C','H','H','H'] assert_benzene(pattern_found.positions)
def test_find_pattern_in_structure__all_atoms_are_within_tolerance(): # tolerances should be absolute in the sense that even if an atom is very far away from another atom, the location # of that atom should be within the tolerance. E.g, here, we have a molecule that looks like this: #. HC.....................................................B and if the distance between C and B was 100 angstrom # then 5% error in the distance could lead to a 5 angstrom difference in position of the B atom. This is not what # we want when replacing portions of crystalline structures. If there is a use-case for relative tolerances, we can # easily add it back in. longstructure = Atoms(elements='HCB', positions=[(1, 0, 0), (2, 0, 0), (103, 0, 0)], cell=1000*np.identity(3)) longpattern = Atoms(elements='HCB', positions=[(1, 0, 0), (2, 0, 0), (108., 0, 0)]) assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0 longpattern.positions[2] = (104., 0, 0) assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0 longpattern.positions[2] = (103.1, 0, 0) assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0 longpattern.positions[2] = (103.04, 0, 0) assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 1
def test_find_pattern_in_structure__cnnc_over_xy_pbc_has_positions_across_xy_pbc(linear_cnnc): v2_2 = sqrt(2.0) / 2 linear_cnnc = Atoms(elements='CNNC', positions=[(0., 0., 0), (v2_2, v2_2, 0.), (2.*v2_2, 2.*v2_2, 0.), (3.*v2_2, 3.*v2_2, 0.)], cell=15*np.identity(3)) linear_cnnc.positions = (linear_cnnc.positions + (-0.5, -0.5, 0.0)) % 15 linear_cnnc.pop() #don't match final NC print(linear_cnnc.positions) search_pattern = Atoms(elements='CN', positions=[(0.0, 0., 0), (1.0, 0., 0.)]) match_indices, match_positions = find_pattern_in_structure(linear_cnnc, search_pattern, return_positions=True) assert np.isclose(linear_cnnc[match_indices[0]].positions, np.array([(14.5, 14.5, 0.), (sqrt2_2 - 0.5, sqrt2_2 - 0.5, 0.)])).all() assert (match_positions[0] == np.array([(14.5, 14.5, 0.), (14.5 + sqrt2_2, 14.5 + sqrt2_2, 0.)])).all()
def test_find_pattern_in_structure__octane_has_2_CH3(octane): pattern = Atoms(elements='CHHH', positions=[(0, 0, 0), (-0.538, -0.635, 0.672), (-0.397, 0.993, 0.052), (-0.099, -0.371, -0.998)]) match_indices = find_pattern_in_structure(octane, pattern) assert len(match_indices) == 2 for indices in match_indices: pattern_found = octane[indices] assert pattern_found.elements == ["C", "H", "H", "H"] cpos = pattern_found.positions[0] assert ((pattern_found.positions[1] - cpos) ** 2).sum() == approx(1.18704299, 5e-2) assert ((pattern_found.positions[2] - cpos) ** 2).sum() == approx(1.18704299, 5e-2) assert ((pattern_found.positions[3] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
def test_find_pattern_in_structure__octane_has_12_CH2(octane): # there are technically 12 matches, since each CH3 makes 3 variations of CH2 pattern = Atoms(elements='CHH', positions=[(0, 0, 0),(-0.1 , -0.379, -1.017), (-0.547, -0.647, 0.685)]) match_indices = find_pattern_in_structure(octane, pattern) assert len(match_indices) == 12 for indices in match_indices: pattern_found = octane[indices] assert pattern_found.elements == ["C", "H", "H"] cpos = pattern_found.positions[0] assert ((pattern_found.positions[1] - cpos) ** 2).sum() == approx(1.18704299, 5e-2) assert ((pattern_found.positions[2] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
def test_find_pattern_in_structure__octane_over_pbc_has_2_CH3(octane): # CH3 CH2 CH2 CH2 CH2 CH2 CH2 CH3 # # move atoms across corner boundary octane.positions += -1.8 # move coordinates into main 15 Å unit cell octane.positions %= 15 octane.cell = (15 * np.identity(3)) pattern = Atoms(elements='CHHH', positions=[(0, 0, 0), (-0.538, -0.635, 0.672), (-0.397, 0.993, 0.052), (-0.099, -0.371, -0.998)]) match_indices = find_pattern_in_structure(octane, pattern) assert len(match_indices) == 2 for indices in match_indices: assert octane[indices].elements == ["C", "H", "H", "H"]
def test_find_pattern_in_structure__match_indices_returned_in_order_of_pattern(): structure = Atoms(elements='HOH', positions=[(4., 0, 0), (5., 0., 0), (6., 0., 0.),], cell=15*np.identity(3)) search_pattern = Atoms(elements='HO', positions=[(-1., 0, 0), (0., 0., 0.)]) match_indices = find_pattern_in_structure(structure, search_pattern) assert set(match_indices) == {(0, 1), (2, 1)}
def mofun_cli(inputpath, outputpath, find_path=None, replace_path=None, replace_fraction=1.0, axis1a_idx=0, axis1b_idx=-1, axis2_idx=None, dumppath=None, chargefile=None, replicate=None, mic=None, framework_element=None, pp=False): atoms = Atoms.load(inputpath) # upate positions from lammps dump file if dumppath is not None: # update positions in original atoms file with new positions dumpatoms = ase.io.read(dumppath, format="lammps-dump-text") assert len(dumpatoms.positions) == len(atoms.positions) atoms.positions = dumpatoms.positions # update charges if chargefile is not None: charges = np.array( [float(line.strip()) for line in chargefile if line.strip() != '']) assert len(charges) == len(atoms.positions) atoms.charges = charges if replicate is not None: atoms = atoms.replicate(replicate) # replicate to meet minimum image convention, if necessary if mic is not None: repls = np.array(np.ceil(2 * mic / np.diag(atoms.cell)), dtype=int) atoms = atoms.replicate(repls) if pp: assign_pair_params_to_structure(atoms) if replace_path is not None and find_path is None: print("Cannot perform a replace operation without a find operation") elif find_path is not None: search_pattern = Atoms.load(find_path) if replace_path is not None: replace_pattern = Atoms.load(replace_path) atoms = replace_pattern_in_structure( atoms, search_pattern, replace_pattern, axis1a_idx=axis1a_idx, axis1b_idx=axis1b_idx, axis2_idx=axis2_idx, replace_fraction=replace_fraction) else: results = find_pattern_in_structure(atoms, search_pattern) print("Found %d instances of the search_pattern in the structure" % len(results)) print(results) # set framework elements to specified element-only works on ASE exports if framework_element is not None: atoms.symbols[atoms.atom_groups == 0] = framework_element if outputpath.suffix in ['.lmpdat', '.mol']: atoms.save(outputpath) else: print("INFO: Trying output using ASE") aseatoms = atoms.to_ase() if framework_element is not None: aseatoms.symbols[atoms.atom_groups == 0] = framework_element aseatoms.set_pbc(True) aseatoms.write(outputpath)
uio67 = Atoms.load("uio67.cif") uio66_linker = Atoms.load("uio66-linker.cml") uio67_linker = Atoms.load("uio67-linker.cml") output_csv = csv.writer(sys.stdout) output_csv.writerow([ "mof", "repl", "num-atoms", "matches-found", "matches-expected", "process-time-seconds", "perf-counter-seconds" ]) for repldims in all_repldims: structure = uio66.replicate(repldims=repldims) time_s = time.process_time() perf_s = time.perf_counter() patterns = find_pattern_in_structure(structure, uio66_linker) output_csv.writerow( ("uio66", "%dx%dx%dx" % repldims, len(structure), len(patterns), 24 * repldims[0] * repldims[1] * repldims[2], time.process_time() - time_s, time.perf_counter() - perf_s)) for repldims in all_repldims: structure = uio67.replicate(repldims=repldims) time_s = time.process_time() perf_s = time.perf_counter() patterns = find_pattern_in_structure(structure, uio67_linker) output_csv.writerow( ("uio67", "%dx%dx%dx" % repldims, len(structure), len(patterns), 24 * repldims[0] * repldims[1] * repldims[2], time.process_time() - time_s, time.perf_counter() - perf_s))
def test_find_pattern_in_structure__octane_has_8_carbons(octane): pattern = Atoms(elements='C', positions=[(0, 0, 0)]) match_indices = find_pattern_in_structure(octane, pattern) assert len(match_indices) == 8 for indices in match_indices: assert octane[indices].elements == ["C"]
""" Run and visualize with: python -m cProfile -o hkustperf.pstats -s tottime hkustperf.py snakeviz hkustperf.pstats """ import ase import ase.io import numpy as np from mofun import find_pattern_in_structure, replace_pattern_in_structure, Atoms hkust1_cif = Atoms.load_cif("../tests/hkust-1/hkust-1-with-bonds.cif") benzene = Atoms.from_ase_atoms(ase.io.read("../tests/molecules/benzene.xyz")) hkust1_repped = hkust1_cif.replicate(repldims=(3, 3, 3)) match_indices = find_pattern_in_structure(hkust1_repped, benzene)