예제 #1
0
    def testNumberOfHitsWithStartAtom(self):
        """
        Test the molecular formula creator, which is
        basically a RDKit feature, but used in KinBot
        for postprocessing
        """
        # the data contains the smiles as key and as value:
        # 1. the multiplicity
        # 2. start atom
        # 3. the motif which will be searched for
        # 4. the expected number of hits of the motif
        data = {
            'CCCO[O]': [2, 0, ['C', 'C', 'C', 'O', 'O'], 1],
            'C=C': [1, 0, ['X', 'X', 'X'], 2],
            'S=S': [1, 0, ['S', 'S'], 1],
        }

        for smi in data:
            mult = data[smi][0]
            start = data[smi][1]
            motif = data[smi][2]
            exp = data[smi][3]
            st_pt = StationaryPoint(smi, 0, mult, smiles=smi)
            st_pt.characterize()
            bond = st_pt.bond
            natom = st_pt.natom
            atom = st_pt.atom
            # do not use any equivalencies
            eqv = [[k] for k in range(natom)]
            hits = find_motif.start_motif(motif, natom, bond, atom, start, eqv)
            cal = len(hits)
            warn = 'Unexpected number of motif hits for '
            warn += '{}, expected {}, calculated {}'.format(smi, exp, cal)
            self.assertEqual(exp, cal, warn)
예제 #2
0
 def rigid_along_path(self,atomi, atomj):
     """
     Method finds the shortest path between two atoms and checks if any atom along that
     pathway is rigid. An atom is rigid if it is in a cycle or is doubly bonded to another atom
     which has more than one neighbor. 
     """
     
     if self.bond[atomi][atomj] > 0:
         if self.bond[atomi][atomj] > 1:  # atoms are doubly bonded
             return 1
         elif self.cycle[atomi] == 1:  # atoms are in a cycle
             return 1
         else:
             return 0
     
     for chain_length in range(3, self.natom):
         motif = ['X' for i in range(chain_length)]
         instances = find_motif.start_motif(motif, self.natom, self.bond, self.atom, -1, [[k] for k in range(self.natom)])
         if len(instances) == 0:
             break
         for ins in instances:
             if (ins[0] == atomi and ins[-1] == atomj) or (ins[0] == atomj and ins[-1] == atomi):
                 for at in ins[1:-1]:
                     if self.cycle[at] == 1:
                         return 1
                     elif 2 in self.bond[at]:
                         double_neigh = [i for i, x in enumerate(self.bond[at]) if x == 2]
                         for neigh in double_neigh:
                             if sum(self.bond[neigh]) > 2:  # atom has at least on other neighbor
                                 return 1
                 return 0
     return 0
예제 #3
0
 def testBondFilter(self):
     """
     Test the molecular formula creator, which is
     basically a RDKit feature, but used in KinBot
     for postprocessing
     """
     smi = 'CCC=CCC'
     motif = ['C', 'C', 'C', 'C']
     bondpattern = [2, 'X', 'X']
     exp = 2
     st_pt = StationaryPoint(smi, 0, 1, smiles=smi)
     st_pt.characterize()
     bond = st_pt.bond
     natom = st_pt.natom
     atom = st_pt.atom
     # do not use any equivalencies
     eqv = [[k] for k in range(natom)]
     hits = find_motif.start_motif(motif, natom, bond, atom, -1, eqv)
     count = 0
     for hit in hits:
         if find_motif.bondfilter(hit, bond, bondpattern) == 0:
             count += 1
     warn = 'Unexpected number of motif hits for '
     warn += '{}, expected {}, calculated {}'.format(smi, exp, count)
     self.assertEqual(exp, count, warn)
예제 #4
0
    def calc_chiral(self):
        """
        Calculate self.chiral. 0 if non-chiral, +1 or -1 if chiral. Each atom gets a label like this.
        """

        self.chiral = np.zeros(self.natom)

        # take min of resonance structure bonds
        # as those portions are planar and do not contribute to chirality
        # for the >C=C=C< case
        reduced_bond = self.bonds[0]
        for b in range(len(self.bonds) - 1):
            reduced_bond = np.minimum(self.bonds[b], self.bonds[b + 1])

        for i in range(self.natom):
            if np.count_nonzero(
                    reduced_bond[i] > 0) == 4:  # exactly 4 neighbors
                atids = []
                positions = np.empty((0, 3))
                for j in range(self.natom):
                    if reduced_bond[i][j] > 0:
                        atids.append(self.atomid[j])
                        positions = np.append(positions, [self.geom[j]],
                                              axis=0)
                if len(set(atids)) == 4:  # all are different
                    self.chiral[i] = self.calc_chiral_hand(
                        self.geom[i], positions, atids)

            if np.count_nonzero(
                    reduced_bond[i] == 2) > 0:  # has at least one double bond
                for dlen in range(
                        2, 9,
                        2):  # up to 8, even number of double bonds in a row
                    motif = ['X' for i in range(dlen + 1)]
                    instances = find_motif.start_motif(motif, self.natom,
                                                       reduced_bond, self.atom,
                                                       i, self.atom_eqv)
                    bondpattern = [2 for d in range(dlen)]
                    for instance in instances:
                        atids = []
                        if find_motif.bondfilter(instance, reduced_bond,
                                                 bondpattern) == 0:
                            positions = np.empty((0, 3))
                            for j in range(self.natom):
                                if (reduced_bond[instance[0]][j] > 0 or reduced_bond[instance[-1]][j] > 0) and \
                                   (j not in instance):  # bonded to first or last atom in instance
                                    atids.append(self.atomid[j])
                                    positions = np.append(positions,
                                                          [self.geom[j]],
                                                          axis=0)
                            if len(set(atids)) == 4:
                                center = instance[int(dlen / 2)]
                                self.chiral[center] = self.calc_chiral_hand(
                                    self.geom[center], positions, atids)

        return 0
예제 #5
0
def get_chain(a1, a2, mol):
    """
    Get the shortest chain between two atoms
    """
    for i in range(1, mol.natom):
        motif = ['X' for j in range(i)]
        instances = find_motif.start_motif(motif, mol.natom, mol.bond,
                                           mol.atom, a1,
                                           [[k] for k in range(mol.natom)])
        for ins in instances:
            if ins[-1] == a2:
                return ins
    return []
예제 #6
0
    def find_cycle(self):
        """
        Find all the cycles in a molecule, if any
        This is done by searching from motifs ['X','X', ..., 'X']
        with length 3 to natom, and the cycles are defined by 
        the motif instances of which the first and last atom are bonded

        The search is halted before reaching natoms if a certain morif length 
        does not give any hit

        TODO: leave all the leaves of the graph out for the search, i.e.
        the atoms that only have neighbor, as they never participate in a cycle

        The cycles are kept in the cycle_chain list, which is a list of lists
        These lists contain the atom indices participating in each cycle.

        In the case of fused cycles, keep all the possible cycles (e.g. two fused
        rings lead to three cycles, and they are all defined in the cycle_chain
        """

        self.cycle_chain = []  #list of the cycles
        self.cycle = [0 for i in range(self.natom)
                      ]  # 0 if atom is not in cycle, 1 otherwise

        for cycle_size in range(3, self.natom + 1):
            motif = ['X' for i in range(cycle_size)]
            instances = find_motif.start_motif(motif, self.natom, self.bond,
                                               self.atom, -1,
                                               [[k]
                                                for k in range(self.natom)])
            if len(instances) == 0:
                break
            for ins in instances:
                if self.bond[ins[0]][ins[-1]]:
                    #cycle found, check if it is new
                    new = 1
                    for cyc in self.cycle_chain:
                        if sorted(cyc) == sorted(ins):
                            new = 0
                    if new:
                        self.cycle_chain.append(ins)
                        for at in ins:
                            self.cycle[at] = 1
        return 0
예제 #7
0
def divide_atoms(ati, atj, bond, natom, atom):
    """
    This method divides the atoms in a molecule in two sets,
    which are separated by a bond
    In the case of rings, the atoms are equally divided in the two sets,
    which will change the bond length of the bond furthest away from
    the given bond.
    Be careful when using this method for cyclic structures!
    """
    status = 1
    if bond[ati, atj] == 0:
        return 0, [ati], []

    # Get all the atoms on the side of ati
    visited = [ati]
    forbidden = [atj]
    division = [ati]

    # check for cycles and cut them in half
    for ring_size in range(3, natom + 1):
        motif = ['X' for at in range(ring_size)]
        inst = find_motif.start_motif(motif, natom, bond, atom, -1, [])
        for ins in inst:
            if bond[ins[0]][ins[-1]] > 0:
                # cycle found
                if ins[0] == ati and ins[-1] == atj:
                    forbidden.append(ins[ring_size // 2])
                if ins[0] == atj and ins[-1] == ati:
                    forbidden.append(ins[-ring_size // 2 - 1])
        if len(inst) == 0:
            break

    get_neighbors(ati, visited, forbidden, division, bond, natom)
    division2 = [x for x in range(natom) if x not in division]

    return status, division, division2
예제 #8
0
    def find_cycle(self):
        """
        Find all the cycles in a molecule, if any
        This is done by searching from motifs ['X','X', ..., 'X']
        with length 3 to natom, and the cycles are defined by 
        the motif instances of which the first and last atom are bonded

        The search is halted before reaching natoms if a certain morif length 
        does not give any hit

        TODO: leave all the leaves of the graph out for the search, i.e.
        the atoms that only have neighbor, as they never participate in a cycle

        The cycles are kept in the cycle_chain list, which is a list of lists
        These lists contain the atom indices participating in each cycle.

        In the case of fused cycles, keep all the possible cycles (e.g. two fused
        rings lead to three cycles, and they are all defined in the cycle_chain
        """

        self.cycle_chain = []  #list of the cycles
        self.cycle = [0 for i in range(self.natom)
                      ]  # 0 if atom is not in cycle, 1 otherwise

        for cycle_size in range(3, self.natom + 1):
            motif = ['X' for i in range(cycle_size)]
            instances = find_motif.start_motif(motif, self.natom, self.bond,
                                               self.atom, -1,
                                               [[k]
                                                for k in range(self.natom)])
            if len(instances) == 0:
                break
            for ins in instances:
                if self.bond[ins[0]][ins[-1]]:
                    #cycle found, check if it is new
                    new = 1
                    for cyc in self.cycle_chain:
                        if sorted(cyc) == sorted(ins):
                            new = 0
                    if new:
                        self.cycle_chain.append(ins)
                        for at in ins:
                            self.cycle[at] = 1
        ringSizes = []
        filteredRings = []
        if len(self.cycle_chain) > 1:
            for ring in self.cycle_chain:
                ringSize = len(ring)
                ringSizes.append(ringSize)
            ringSizes.sort()
            ringSizes.reverse()
            for size in ringSizes:
                for ring in self.cycle_chain:
                    if len(ring) == size:
                        filteredRings.append(ring)
            checkRings = filteredRings
            for i, ring in enumerate(checkRings):
                duplicateRing = [0] * len(ring)
                for k, a in enumerate(checkRings[i]):
                    j = i + 1
                    while j < len(checkRings):
                        for b in checkRings[j]:
                            if a == b:
                                duplicateRing[k] = 1
                        j = j + 1
                    sumDuplicateRing = sum(duplicateRing)
                    if sumDuplicateRing == len(checkRings[i]):
                        filteredRings.pop(i)
            self.cycle_chain = filteredRings
        return 0