Example #1
    def __init__(self, atoms, scale=SCALE):
        # create atoms object from path or do nothing if already atoms object
        self.atoms = utils.make_atoms_obj(atoms)

        self.scale = scale

        # radii array
        self.radii = covalent_radii[self.atoms.numbers] * self.scale

        # create neighborlist
        self.neighborlist = ase.neighborlist.NeighborList(
            cutoffs=self.radii, skin=0, self_interaction=False)

        # array of i-j atoms involved in a bond
        # shape = (n_bonds, 2)
        self.bond_arr = None

        # dict of atoms' 1st neighbors using a bond list
        self.coord_dict = defaultdict(set)

        # array of coordination numbers (CNs)
        self.cns = None

        # dynamically build half bond array if getter is called
        self._halfbond_arr = None

        # calculate bonding, coordination dict, and cns
Example #2
def get_ligand_map(atom: Union[ase.Atoms, str],
                   bonds: Bonds = None,
                   scale: float = SCALE) -> dict:
    """map sulfur indices to ligand atoms

        atom: metal NC atoms object

    Keyword Arguments:
        bonds: bond object containing all bond details
                       (default: None: Bonds obj will be built)
        scale: scale covalent radii of each atom - for determining
                       bonds when no Bonds object is passed in
                       (default: 1.0)

        {<sulfur index>: list of R group indices that are a part of ligand}
    # create atoms object from path or do nothing if already atoms object
    atom = utils.make_atoms_obj(atom)

    # calculate bonds list (and neighborlist if necessary)
    # create Bonds object if not given
    if bonds is None:
        bonds = Bonds(atom, scale=scale)

    # get R group atom indices
    r_group = set(np.where(~np.isin(atom.symbols, list(MS)))[0])

    ligand_map = {}
    for s in np.where(atom.symbols == 'S')[0]:
        lig = set(bonds.coord_dict[s]) & r_group
        for _ in range(1000):
            last = lig.copy()
            for i in last:
                lig |= (set(bonds.coord_dict[i]) & r_group)
            if lig == last:
                ligand_map[s] = sorted(lig)
            raise utils.LPNCException('Unable to resolve ligand')

    return ligand_map
Example #3
def get_atom_ids(atoms,
                 scale=SCALE) -> List[AtomID]:
    """encode structural type of each atom in LPNC
    - (C[core] | S[shell], el [chemical symbol], ...
    - For core:
        - (C, el, int[coordination number], int[core layer])
        - core layer = 0[center], 1[layer 1], ..., n[surface]
    - For shell:
        - (S, el, int[motif type], E[end] | M[middle])
        - end = sulfur atom that terminates a motif (at least one bond to core)

    list of AtomIDs (CoreID | ShellID) ordered by atoms object indices
    # create atoms object from path or do nothing if already atoms object
    atoms = utils.make_atoms_obj(atoms)

    # initialize id dict
    ids = {}

    # set tags of atoms to match indices of original atoms object
    # NOTE: tags remain with each atom even when creating new atoms objects

    # get Bonds object
    bonds = Bonds(atoms, scale=scale)

    # get core shell details dict
    if cs_details is None:
        cs_details = get_core_shell(atoms, bonds=bonds)

    # use iterative apprach to find each core layer
    core_atoms = set(cs_details['core'])
    toremove = set()
    layers = defaultdict(list)
    layer = 0
    for _ in range(1000):
        # If core atoms set is empty break out of loop
        if not core_atoms:
        for c in core_atoms:
            # if c is not bonded to all other core atoms,
            # it is in the current layer
            if not all(i in core_atoms for i in bonds.coord_dict[c]):
        # remove all atoms found in current layer and shift one layer down
        layer -= 1
        core_atoms -= toremove
        toremove = set()
        raise utils.LPNCException("Unable to identify core atom layers")

    # add back to layer count such that
    # 0: central core atom(s)
    # 1: layer 1
    # <toadd>: surface of core
    # final id: C_(B|S)_<layer number>_x
    toadd = abs(min(layers))
    for key in layers:
        for c in layers[key]:
            layer = key + toadd
            ids[c] = CoreID(c, atoms.symbols[c], bonds.cns[c], layer)

    # create array of shell atom indices
    shell = np.array(cs_details['shell'])

    # get motifs dict
    if motifs is None:
        map_s0 = []
        if cs_details['sulfido']:
            map_s0 = np.where(np.vstack(cs_details['sulfido']) == shell)[1]

        # only pass in shell atoms to avoid running get_core_shell again
        shell_motifs = count_motifs(atoms[shell], scale=scale, sulfido=map_s0)

        # map shell_motif indices back to original atoms object indices
        motifs = {k: shell[v] for k, v in shell_motifs.items()}

    ligand_map = get_ligand_map(atoms, bonds)

    for mtype in sorted(motifs):
        # handle bridge, sulfido, and rings
        if mtype < 1:
            # X-meric rings are all middle atoms
            # bridge and sulfidos considered 'end' motifs
            position = 'middle' if mtype < -1 else 'end'
            for ms in motifs[mtype].flatten():
                ids[ms] = ShellID(ms, atoms.symbols[ms], mtype, position)

            # define R groups (no need for sulfidos [0])
            if mtype != -1:
                # only need to look at sulfurs (even columns)
                for s in motifs[mtype][:, ::2].flatten():
                    for r in ligand_map[s]:
                        ids[r] = ShellID(r, atoms.symbols[r], mtype, 'middle')

        # handle all other "typical" motifs
            # create list for end and middle position labels
            positions = ['middle'] * motifs[mtype].shape[1]
            positions[0] = 'end'
            positions[-1] = 'end'

            # motif value is 2D array of ms indices
            for ms, position in zip(motifs[mtype].flatten(), cycle(positions)):
                # make sure id is not already defined
                # (avoid overwriting sulfido atoms)
                if ms not in ids:
                    ids[ms] = ShellID(ms, atoms.symbols[ms], mtype, position)

                # if ms is S, add in its ligand atoms
                if atoms.symbols[ms] == 'S':
                    for r in ligand_map[ms]:
                        ids[r] = ShellID(ms, atoms.symbols[r], mtype, position)

    id_list = [ids[i] for i in sorted(ids)]

    if len(id_list) != len(atoms):
        raise utils.LPNCException("unable to map to all atoms in LPNC")

    return id_list
Example #4
def count_motifs(atom, scale=SCALE, show=False, sulfido=[]):
    """algorithmically determine motif types and counts of LPNC

        atom (ase.Atoms): metal NC atoms object

    Keyword Arguments:
        full_cluster (bool): if False, atoms object only contains shell
                               (default: False)
        scale (float): scales covalent radii when calculating neighborlist
                         (default: 1.0)
        show (bool): if True, motif info is printed
                       (default: False)
        sulfido (list): list of sulfido atom indices found from
                          get_core_shell function
                          (default: [])

        motif info (dict): {-1: [sulfido indices],
                             0: [bridging S indices],
                             1: [monomer (S-M-S) indices],
                             2: [dimer (S-M-S-M-S) indices],
    # create atoms object from path or do nothing if already atoms object
    atom = utils.make_atoms_obj(atom)

    fc_atom = atom.copy()

    # dictionary of motifs
    # key: motif type (1 - monomer, 3 - trimer, etc.)
    # negative values: -1 = sulfido, -n = "n-meric ring"
    # values: lists of Au and S indices for motif
    all_motifs = {}

    # determine if atoms object is full NC or just shell
    # if # metal >= # S, it must be the full NC
    ns = nm = 0
    for a in atom:
        if a.symbol == 'S':
            ns += 1
        elif a.symbol in METALS:
            nm += 1
    full_cluster = nm >= ns

    # separate into shell if full cluster
    if full_cluster:
        cs_res = get_core_shell(atom, scale=scale, show=False)
        atom = ase.Atoms(atom[cs_res['shell']])
        shell_i = cs_res['shell']
        sulfido = cs_res['sulfido']
        shell_i = np.arange(len(atom))

    # create list to map ms_indices back to orig atom_indices
    # finds metal and S atoms that are in shell
    mapping_i = np.array(
        [i.index for i in fc_atom if i.symbol in MS and i.index in shell_i])

    # make atoms obj of just S and metals (no R)
    ms = ase.Atoms(fc_atom[mapping_i])

    # get mapped sulfido atoms (if none, set to empty list)
    ms_sulfido = []
    sulfido_counts = {}
    if len(sulfido):
        ms_sulfido = np.where(np.vstack(sulfido) == mapping_i)[1]

        # track use of sulfidos in motifs (3 uses per sulfido atom)
        sulfido_counts = {s: 0 for s in ms_sulfido}

        # add sulfido atoms to motifs (if any)
        all_motifs[-1] = np.vstack(sulfido)

    # create Bonds object
    bonds = Bonds(ms, scale=scale)

    # S-M-S-M-...-S motif building"""
    ms_i = set(range(len(ms))) - set(ms_sulfido)
    motif = []
    used = set()
    ends_found = [0, 0]

    # set max iterations to avoid endless while loop
    max_iter = 1000
    for _ in range(max_iter):
        if not motif:
            # if no M S atoms left, terminate loop (all motifs found)
            if not len(ms_i):

            # use sulfidos first (each sulfido should be involved in
            # three different motifs)
            for i in sulfido_counts:
                if sulfido_counts[i] < 3:
                    # if starting with a sulfido, we've already
                    # found the starting end
                    ends_found[0] = 1

            # if no sulfidos or no sulfido uses left, start from a random atom
                i = ms_i.pop()

            # initialize new motif with atom i
            motif = [i]

        for i, last in zip([motif[-1], motif[0]], [1, 0]):
            if ends_found[last]:

            bonded_to = bonds.coord_dict.get(i, [])
            for b in bonded_to:
                # only look at new atoms
                if b in used or b in motif:

                # find next link in motif
                # - motif must have a sulfur everyother atom!
                if sum(ms[[b, i]].symbols == 'S') == 1:
                    motif.insert(len(motif) * last, b)

                    # add b to used and remove from ms_i
                    # iff it is not a sulfido atom
                    if b not in ms_sulfido:
                        ends_found[last] = 1

                ends_found[last] = 1

        # once both motif ends found, add it to all_motifs
        if all(ends_found):
            assert len(set(motif)) == len(motif)

            # increment sulfido usage
            if len(ms_sulfido):
                for m in motif:
                    if m in sulfido_counts:
                        sulfido_counts[m] += 1

            # use number of atoms in motif to determine integer name (mtype)
            # S-M-S-M-S: 5 atoms // 2 = 2: dimer
            mtype = len(motif) // 2

            # if len(motif) is even, negate mtype to indicate ring
            # -S-M-S-M-S-M-S-M-: (8 atom ring // 2)* - 1 = -4: tetrameric ring
            if len(motif) % 2 == 0:
                # ring motif should have bound ends
                # assert sorted([motif[0], motif[-1]]) in bonds.tolist()
                mtype *= -1

            # get the correct indices that map back to atoms obj passed in
            atom_indices = mapping_i[motif].tolist()

            if mtype not in all_motifs:
                all_motifs[mtype] = [atom_indices]

            # reset motif list
            motif = []
            ends_found = [0, 0]

    # raise ValueError if unable to classify all M S atoms into motifs
    # within <max_iter>
        raise ValueError(f"Motif algorithm exceeded {max_iter:,} iterations.")

    # convert motifs to arrays
    for m in all_motifs:
        all_motifs[m] = np.array(all_motifs[m])

    # if show, print motif types and counts of dict in easy-to-read format
    if show:

    return all_motifs
Example #5
def get_core_shell(atom, bonds=None, scale=SCALE, show=False):
    """separates LPNC into core and shell based on "divide and protect" theory

        atom (ase.Atoms): metal NC atoms object

    Keyword Arguments:
        bonds (Bonds): bond object containing all bond details
                       (default: None: Bonds obj will be built)
        scale (float): scale covalent radii of each atom - for determining
                       bonds when no Bonds object is passed in
                       (default: 1.0)
        show (bool): prints details of core and shell if True
                     (defauls: False)

        core shell info (dict): {core: core atom indices,
                                 shell: shell atom indices,
                                 sulfido: sulfido atom indices,
                                 bridge: bridging S indices,
                                 nshellint: num shell interactions,
                                 corecnavg: avg CN of core atoms
                                            (includes bonds to shell),
                                 justcorecnavg: avg CN of core excluding
                                                bonds to shell
    # create atoms object from path or do nothing if already atoms object
    atom = utils.make_atoms_obj(atom)

    # determine if NC has R group (check for C's and H's)
    hasr = (atom.numbers == 6).any() or (atom.numbers == 1).any()

    # calculate bonds list (and neighborlist if necessary)
    # create Bonds object if not given
    if bonds is None:
        bonds = Bonds(atom, scale=scale)

    # first, find sulfido atoms (if any)
    sulfido = []
    # sulfidos are bonded to 3 metals
    # iterate over sulfur atoms
    for s in np.where(atom.symbols == 'S')[0]:
        # get indices of neighbors
        bonded_to = bonds.coord_dict[s]

        # count number of metal neighbors
        mets = sum(a.symbol in METALS for a in atom[bonded_to])

        # S is a sulfido if all neighbors are metals and > 2 neighbors
        if mets == len(bonded_to) > 2:

    # initialize list of core atom indices
    core = []

    for a in atom:
        if a.symbol in METALS:
            # find S neighbors that aren't already in core
            neighs = bonds.coord_dict[a.index]
            s_neighs = sum(atom[neighs].symbols == 'S')

            # less than two S neighbors = core atom
            if s_neighs < 2:
                # add index to list of core atoms

    # get shell atoms
    shell = np.array(list(set(range(len(atom))) - set(core)))

    # get core CN avg
    corecnavg = 0

    # get core CN avg excluding core-shell bonds
    justcorecnavg = 0

    # get number of shell-core interactions
    nshellint = 0

    # only make these calcs if there is a core
    if len(core):
        # calc avg CN of core atoms
        corecnavg = bonds.cns[core].mean()

        # calc core CN avg excluding core-shell bonds
        core_set = set(core)
        justcore_cns = map(len,
                           (set(bonds.coord_dict[c]) & core_set for c in core))
        justcorecnavg = np.mean(list(justcore_cns))

        # calculate min shell-to-core distance for M shell atoms
        metal_shell = [sh for sh in shell if atom[sh].symbol in METALS]

        all_dists = atom.get_all_distances()
        dists = all_dists[np.vstack(metal_shell), core].min(1)

        # add M atoms to nshellint if their distance is < 5 to core
        nshellint = sum(dists < 5)

    # find bridging motifs
    # if no R group, bridging S will have no bonds
    # else they'll have 1 bond
    match = int(hasr)

    # create matrix only containing shell bonds
    shell_bonds = bonds.bond_arr[np.isin(bonds.bond_arr, shell).all(1)]

    # find bridging S's by matching <match> CN
    s_shell = shell[atom[shell].numbers == 16]

    bridges = s_shell[np.bincount(shell_bonds.flatten())[s_shell] == match]

    # add in bridging S's to nshellint
    nshellint += len(bridges)

    # create info dict
    info = {
        'core': core,
        'shell': shell.tolist(),
        'sulfido': sulfido,
        'bridge': bridges.tolist(),
        'nshellint': nshellint,
        'corecnavg': corecnavg,
        'justcorecnavg': justcorecnavg

    # print summary of info (if show)
    if show:
        print(atom.get_chemical_formula('metal').center(CEN, '-'))

        print('----- Sep. Info -----'.center(CEN))
        # create list of sep details
        sep_dets = [
            'N-core'.rjust(VCEN) + f': {len(core)}',
            'N-shellint'.rjust(VCEN) + f': {nshellint}',
            'Core CN Avg'.rjust(VCEN) + f': {corecnavg:.3f}',
            'Just Core CN Avg'.rjust(VCEN) + f': {justcorecnavg:.3f}\n'

        # if no core, only print shell interactions
        if not len(core):
            print('(NO CORE FOUND)'.center(CEN))

        # else print all core details

    return info
Example #6
    def __init__(self, atoms, scale=SCALE):
        # create atoms object from path or do nothing if already atoms object
        self.atoms = utils.make_atoms_obj(atoms)
        self.atoms = self.atoms.copy()

        # set tag indices to atoms object
        # that way, any new atoms objects will have a mapping index
        # back to original atoms object

        self.scale = scale
        self.bonds = Bonds(self.atoms, self.scale)

        self.info = get_core_shell(self.atoms, bonds=self.bonds)

        # create core and shell atoms objects
        self.core = self.atoms[self.info['core']]
        self.shell = self.atoms[self.info['shell']]

        # get number of metal atoms, number of sulfur atoms
        self.n_m = np.isin(self.atoms.symbols, list(METALS)).sum()
        self.n_s = sum(self.atoms.symbols == 'S')

        # number of core atoms
        self.n_core = len(self.core)

        # get average core CN
        self.core_cn_avg = self.info['corecnavg']
        self.just_core_cn_avg = self.info['justcorecnavg']

        # create ase.Atoms objects of each ligand
        self.ligand_map = get_ligand_map(self.atoms, self.bonds)
        self.ligands = [
            self.atoms[[k] + v] for k, v in self.ligand_map.items()

        # get number of sulfido atoms
        self.n_sulfido = len(self.info['sulfido'])

        # calculate number of ligands
        self.n_ligand = self.n_s - self.n_sulfido

        # get number of C and H per ligand
        self.n_c_per_lig = sum(self.atoms.symbols == 'C') / self.n_ligand
        self.n_h_per_lig = sum(self.atoms.symbols == 'H') / self.n_ligand

        # get motif details
        self.motifs = None

        # get atom structural ids
        self.ids = get_atom_ids(self.atoms, self.info, self.motifs, self.scale)

        # feature vector (fingerprint, fp)
        # n metals, n sulfurs, n core atoms, average CN of core atoms
        self.fp = [self.n_m, self.n_s, self.n_core, self.core_cn_avg]

        # add number of C and H per ligand to fingerprint
        self.fp += [self.n_c_per_lig, self.n_h_per_lig]

        # add motif counts to fingerprint
        # n sulfido, n bridge, n monomer, n dimer, n trimer,
        self.fp += [len(list(self.motifs.get(i, []))) for i in range(-2, 4)]

        # convert fingerprint to array
        self.fp = np.array(self.fp)