Example #1
0
def test_find_pattern_in_structure__cnnc_over_x_pbc_has_positions_across_x_pbc(linear_cnnc):
    linear_cnnc.positions = (linear_cnnc.positions + (-0.5, 0.0, 0.0)) % 15
    linear_cnnc.pop(-1) #don't match final NC
    search_pattern = Atoms(elements='CN', positions=[(0.0, 0., 0), (1.0, 0., 0.)])
    match_indices, match_positions = find_pattern_in_structure(linear_cnnc, search_pattern, return_positions=True)
    assert (linear_cnnc[match_indices[0]].positions == [(14.5, 0., 0.), (0.5, 0., 0.)]).all()
    assert (match_positions[0] == np.array([(14.5, 0., 0.), (15.5, 0., 0.)])).all()
Example #2
0
def test_replace_pattern_in_structure__replace_hydrogens_in_octane_with_fluorines_half_the_time(octane):
    search_pattern = Atoms(elements='H', positions=[(0, 0, 0)])
    replace_pattern = Atoms(elements='F', positions=[(0, 0, 0)])
    match_indices = find_pattern_in_structure(octane, search_pattern)
    final_structure = replace_pattern_in_structure(octane, search_pattern, replace_pattern, replace_fraction=0.5)
    assert Counter(final_structure.elements) == {"H":9, "F": 9, "C": 8}
    assert_structure_positions_are_unchanged(octane, final_structure)
Example #3
0
def test_find_pattern_in_structure__hkust1_unit_cell_has_48_Cu_metal_nodes(hkust1_cif):
    pattern = Atoms(elements='Cu', positions=[(0, 0, 0)])
    match_indices = find_pattern_in_structure(hkust1_cif, pattern)

    assert len(match_indices) == 48
    for indices in match_indices:
        pattern_found = hkust1_cif[indices]
        assert list(pattern_found.elements) == ['Cu']
Example #4
0
def test_find_pattern_in_structure__hkust1_xyz_3x3x3_supercell_has_1296_Cu_metal_nodes(hkust1_3x3x3_xyz):
    pattern = Atoms(elements='Cu', positions=[(0, 0, 0)])
    match_indices = find_pattern_in_structure(hkust1_3x3x3_xyz, pattern)

    assert len(match_indices) == 1296
    for indices in match_indices:
        pattern_found = hkust1_3x3x3_xyz[indices]
        assert list(pattern_found.elements) == ['Cu']
Example #5
0
def test_find_pattern_in_structure__hkust1_unit_cell_offset_has_32_benzene_rings(hkust1_cif, benzene):
    hkust1_cif.translate((-4,-4,-4))
    hkust1_cif.positions = hkust1_cif.positions % np.diag(hkust1_cif.cell)
    match_indices, coords = find_pattern_in_structure(hkust1_cif, benzene, return_positions=True)
    for i, indices in enumerate(match_indices):
        assert list(hkust1_cif[indices].elements) == ['C','C','C','C','C','C','H','H','H']
        assert_benzene(coords[i])
    assert len(match_indices) == 32
Example #6
0
def test_find_pattern_in_structure__hkust1_unit_cell_has_32_benzene_rings(hkust1_cif, benzene):
    match_indices = find_pattern_in_structure(hkust1_cif, benzene)

    assert len(match_indices) == 32
    for indices in match_indices:
        pattern_found = hkust1_cif[indices]
        assert list(pattern_found.elements) == ['C','C','C','C','C','C','H','H','H']
        assert_benzene(pattern_found.positions)
Example #7
0
def test_find_pattern_in_structure__hkust1_cif_3x3x3_supercell_has_1296_Cu_metal_nodes(hkust1_cif):
    hkust1_3x3x3 = hkust1_cif.replicate(repldims=(3,3,3))
    pattern = Atoms(elements='Cu', positions=[(0, 0, 0)])
    match_indices = find_pattern_in_structure(hkust1_3x3x3, pattern)

    assert len(match_indices) == 1296
    for indices in match_indices:
        pattern_found = hkust1_3x3x3[indices]
        assert list(pattern_found.elements) == ['Cu']
Example #8
0
def test_find_pattern_in_structure__hkust1_cif_2x2x2_supercell_has_256_benzene_rings(hkust1_cif, benzene):
    hkust1_2x2x2 = hkust1_cif.replicate(repldims=(2,2,2))
    match_indices = find_pattern_in_structure(hkust1_2x2x2, benzene)

    assert len(match_indices) == 256
    for indices in match_indices:
        pattern_found = hkust1_2x2x2[indices]
        assert list(pattern_found.elements) == ['C','C','C','C','C','C','H','H','H']
        assert_benzene(pattern_found.positions)
Example #9
0
def test_find_pattern_in_structure__all_atoms_are_within_tolerance():
    # tolerances should be absolute in the sense that even if an atom is very far away from another atom, the location
    # of that atom should be within the tolerance. E.g, here, we have a molecule that looks like this:
    #.  HC.....................................................B and if the distance between C and B was 100 angstrom
    # then 5% error in the distance could lead to a 5 angstrom difference in position of the B atom. This is not what
    # we want when replacing portions of crystalline structures. If there is a use-case for relative tolerances, we can
    # easily add it back in.
    longstructure = Atoms(elements='HCB', positions=[(1, 0, 0), (2, 0, 0), (103, 0, 0)], cell=1000*np.identity(3))
    longpattern = Atoms(elements='HCB', positions=[(1, 0, 0), (2, 0, 0), (108., 0, 0)])
    assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0

    longpattern.positions[2] = (104., 0, 0)
    assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0

    longpattern.positions[2] = (103.1, 0, 0)
    assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 0

    longpattern.positions[2] = (103.04, 0, 0)
    assert len(find_pattern_in_structure(longstructure, longpattern, abstol=0.05)) == 1
Example #10
0
def test_find_pattern_in_structure__cnnc_over_xy_pbc_has_positions_across_xy_pbc(linear_cnnc):
    v2_2 = sqrt(2.0) / 2
    linear_cnnc = Atoms(elements='CNNC', positions=[(0., 0., 0), (v2_2, v2_2, 0.), (2.*v2_2, 2.*v2_2, 0.), (3.*v2_2, 3.*v2_2, 0.)], cell=15*np.identity(3))
    linear_cnnc.positions = (linear_cnnc.positions + (-0.5, -0.5, 0.0)) % 15
    linear_cnnc.pop() #don't match final NC
    print(linear_cnnc.positions)
    search_pattern = Atoms(elements='CN', positions=[(0.0, 0., 0), (1.0, 0., 0.)])
    match_indices, match_positions = find_pattern_in_structure(linear_cnnc, search_pattern, return_positions=True)
    assert np.isclose(linear_cnnc[match_indices[0]].positions, np.array([(14.5, 14.5, 0.), (sqrt2_2 - 0.5, sqrt2_2 - 0.5, 0.)])).all()
    assert (match_positions[0] == np.array([(14.5, 14.5, 0.), (14.5 + sqrt2_2, 14.5 + sqrt2_2, 0.)])).all()
Example #11
0
def test_find_pattern_in_structure__octane_has_2_CH3(octane):
    pattern = Atoms(elements='CHHH', positions=[(0, 0, 0), (-0.538, -0.635,  0.672), (-0.397,  0.993,  0.052), (-0.099, -0.371, -0.998)])
    match_indices = find_pattern_in_structure(octane, pattern)
    assert len(match_indices) == 2
    for indices in match_indices:
        pattern_found = octane[indices]
        assert pattern_found.elements == ["C", "H", "H", "H"]
        cpos = pattern_found.positions[0]
        assert ((pattern_found.positions[1] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
        assert ((pattern_found.positions[2] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
        assert ((pattern_found.positions[3] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
Example #12
0
def test_find_pattern_in_structure__octane_has_12_CH2(octane):
    # there are technically 12 matches, since each CH3 makes 3 variations of CH2
    pattern = Atoms(elements='CHH', positions=[(0, 0, 0),(-0.1  , -0.379, -1.017), (-0.547, -0.647,  0.685)])
    match_indices = find_pattern_in_structure(octane, pattern)

    assert len(match_indices) == 12
    for indices in match_indices:
        pattern_found = octane[indices]
        assert pattern_found.elements == ["C", "H", "H"]
        cpos = pattern_found.positions[0]
        assert ((pattern_found.positions[1] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
        assert ((pattern_found.positions[2] - cpos) ** 2).sum() == approx(1.18704299, 5e-2)
Example #13
0
def test_find_pattern_in_structure__octane_over_pbc_has_2_CH3(octane):
    # CH3 CH2 CH2 CH2 CH2 CH2 CH2 CH3 #
    # move atoms across corner boundary
    octane.positions += -1.8
    # move coordinates into main 15 Å unit cell
    octane.positions %= 15
    octane.cell = (15 * np.identity(3))

    pattern = Atoms(elements='CHHH', positions=[(0, 0, 0), (-0.538, -0.635,  0.672), (-0.397,  0.993,  0.052), (-0.099, -0.371, -0.998)])
    match_indices = find_pattern_in_structure(octane, pattern)
    assert len(match_indices) == 2
    for indices in match_indices:
        assert octane[indices].elements == ["C", "H", "H", "H"]
Example #14
0
def test_find_pattern_in_structure__match_indices_returned_in_order_of_pattern():
    structure = Atoms(elements='HOH', positions=[(4., 0, 0), (5., 0., 0), (6., 0., 0.),], cell=15*np.identity(3))
    search_pattern = Atoms(elements='HO', positions=[(-1., 0, 0), (0., 0., 0.)])
    match_indices = find_pattern_in_structure(structure, search_pattern)
    assert set(match_indices) == {(0, 1), (2, 1)}
Example #15
0
def mofun_cli(inputpath,
              outputpath,
              find_path=None,
              replace_path=None,
              replace_fraction=1.0,
              axis1a_idx=0,
              axis1b_idx=-1,
              axis2_idx=None,
              dumppath=None,
              chargefile=None,
              replicate=None,
              mic=None,
              framework_element=None,
              pp=False):
    atoms = Atoms.load(inputpath)

    # upate positions from lammps dump file
    if dumppath is not None:
        # update positions in original atoms file with new positions
        dumpatoms = ase.io.read(dumppath, format="lammps-dump-text")
        assert len(dumpatoms.positions) == len(atoms.positions)
        atoms.positions = dumpatoms.positions

    # update charges
    if chargefile is not None:
        charges = np.array(
            [float(line.strip()) for line in chargefile if line.strip() != ''])
        assert len(charges) == len(atoms.positions)
        atoms.charges = charges

    if replicate is not None:
        atoms = atoms.replicate(replicate)

    # replicate to meet minimum image convention, if necessary
    if mic is not None:
        repls = np.array(np.ceil(2 * mic / np.diag(atoms.cell)), dtype=int)
        atoms = atoms.replicate(repls)

    if pp:
        assign_pair_params_to_structure(atoms)

    if replace_path is not None and find_path is None:
        print("Cannot perform a replace operation without a find operation")
    elif find_path is not None:
        search_pattern = Atoms.load(find_path)
        if replace_path is not None:
            replace_pattern = Atoms.load(replace_path)
            atoms = replace_pattern_in_structure(
                atoms,
                search_pattern,
                replace_pattern,
                axis1a_idx=axis1a_idx,
                axis1b_idx=axis1b_idx,
                axis2_idx=axis2_idx,
                replace_fraction=replace_fraction)
        else:
            results = find_pattern_in_structure(atoms, search_pattern)
            print("Found %d instances of the search_pattern in the structure" %
                  len(results))
            print(results)

    # set framework elements to specified element-only works on ASE exports
    if framework_element is not None:
        atoms.symbols[atoms.atom_groups == 0] = framework_element

    if outputpath.suffix in ['.lmpdat', '.mol']:
        atoms.save(outputpath)
    else:
        print("INFO: Trying output using ASE")
        aseatoms = atoms.to_ase()
        if framework_element is not None:
            aseatoms.symbols[atoms.atom_groups == 0] = framework_element

        aseatoms.set_pbc(True)
        aseatoms.write(outputpath)
Example #16
0
uio67 = Atoms.load("uio67.cif")

uio66_linker = Atoms.load("uio66-linker.cml")
uio67_linker = Atoms.load("uio67-linker.cml")

output_csv = csv.writer(sys.stdout)
output_csv.writerow([
    "mof", "repl", "num-atoms", "matches-found", "matches-expected",
    "process-time-seconds", "perf-counter-seconds"
])

for repldims in all_repldims:
    structure = uio66.replicate(repldims=repldims)
    time_s = time.process_time()
    perf_s = time.perf_counter()
    patterns = find_pattern_in_structure(structure, uio66_linker)
    output_csv.writerow(
        ("uio66", "%dx%dx%dx" % repldims, len(structure),
         len(patterns), 24 * repldims[0] * repldims[1] * repldims[2],
         time.process_time() - time_s, time.perf_counter() - perf_s))

for repldims in all_repldims:
    structure = uio67.replicate(repldims=repldims)
    time_s = time.process_time()
    perf_s = time.perf_counter()
    patterns = find_pattern_in_structure(structure, uio67_linker)
    output_csv.writerow(
        ("uio67", "%dx%dx%dx" % repldims, len(structure),
         len(patterns), 24 * repldims[0] * repldims[1] * repldims[2],
         time.process_time() - time_s, time.perf_counter() - perf_s))
Example #17
0
def test_find_pattern_in_structure__octane_has_8_carbons(octane):
    pattern = Atoms(elements='C', positions=[(0, 0, 0)])
    match_indices = find_pattern_in_structure(octane, pattern)
    assert len(match_indices) == 8
    for indices in match_indices:
        assert octane[indices].elements == ["C"]
Example #18
0
"""
Run and visualize with:

python -m cProfile -o hkustperf.pstats  -s tottime hkustperf.py
snakeviz hkustperf.pstats

"""

import ase
import ase.io

import numpy as np

from mofun import find_pattern_in_structure, replace_pattern_in_structure, Atoms

hkust1_cif = Atoms.load_cif("../tests/hkust-1/hkust-1-with-bonds.cif")
benzene = Atoms.from_ase_atoms(ase.io.read("../tests/molecules/benzene.xyz"))
hkust1_repped = hkust1_cif.replicate(repldims=(3, 3, 3))
match_indices = find_pattern_in_structure(hkust1_repped, benzene)