예제 #1
0
class DihedralSelector:
    def __init__(self, molecule, skip_straight=True):
        self.m = molecule
        self.bond_graph = BondGraph(self.m.bonds)
        self.avoid_angles_set = set()
        if skip_straight:
            self.avoid_angles_set = self.get_straight_angles()

    def get_straight_angles(self, threshold=165.0):
        straight_angles = set()
        self.m.build_topology(force_bonds=False)
        for a in self.m.find_angles():
            if self.m.measure_angles(*a)[0] >= threshold:
                straight_angles.add(a)
                straight_angles.add(a[::-1])
        return straight_angles

    def find_dihedrals(self, dihedral_filters=None):
        """ Find all dihedrals, then apply filters """
        # default empty filter
        if dihedral_filters is None: dihedral_filters = []
        # get the list of dihedrals
        dihedral_list = self.bond_graph.get_dihedrals()
        print(f'Found total {len(dihedral_list)} distinct dihedrals')
        if self.avoid_angles_set:
            dihedral_list = [d for d in dihedral_list if d[:3] not in self.avoid_angles_set and d[1:] not in self.avoid_angles_set]
            print(f'{len(dihedral_list)} dihedrals left after filter out straight angles')
        # apply filters
        available_filters = {
            'equiv_terminal': self.filter_equivalent_terminals,
            'heavy_atoms': self.filter_keep_4_heavy,
            'no_ring': self.filter_remove_ring,
            'unique_center_bond': self.filter_keep_unique_center
        }
        for filt_name in dihedral_filters:
            if filt_name not in available_filters:
                raise ValueError(f"Filter named {filt_name} not recognized, choices are {available_filters.keys()}")
            filt = available_filters[filt_name]
            dihedral_list = filt(dihedral_list)
        return dihedral_list

    def filter_equivalent_terminals(self, dihedral_list):
        eq_idxs = self.find_equivalent_terminal_atom_idxs()
        print(f"Filtering based on equivalent terminal atoms: {eq_idxs}")
        dihedrals_set = set()
        for i,j,k,l in dihedral_list:
            skipped = False
            for eq_i in eq_idxs[i]:
                for eq_l in eq_idxs[l]:
                    if (eq_i, j, k, eq_l) in dihedrals_set:
                        print(f"Filter: dihedral {i}-{j}-{k}-{l} skipped because equivalent to {eq_i}-{j}-{k}-{eq_l}")
                        skipped = True
                        break
                if skipped: break
            if not skipped:
                dihedrals_set.add((i,j,k,l))
        # resume the ordering of dihedrals
        dihedral_list = sorted(dihedrals_set, key=lambda d: (d[1], d[2], d[0], d[3]))
        return dihedral_list

    def find_equivalent_terminal_atom_idxs(self):
        elem_list = self.m.elem
        neighbor_list = self.bond_graph
        noa = self.m.na
        equal_atom_idxs = {i:{i} for i in range(noa)}
        for i in range(noa):
            for j in range(i+1, noa):
                if elem_list[i] == elem_list[j]:
                    if len(neighbor_list[i]) == len(neighbor_list[j]) == 1:
                        if neighbor_list[i] == neighbor_list[j]:
                            equal_atom_idxs[i].add(j)
                            equal_atom_idxs[j].add(i)
        return equal_atom_idxs

    def filter_keep_4_heavy(self, dihedral_list):
        """ Filter dihedrals, keep only with 4 heavy atoms """
        print("Filter: only keep dihedrals formed by 4 heavy atoms")
        elem_list = self.m.elem
        filtered_dihedral_list = [d for d in dihedral_list if not any(elem_list[idx] == 'H' for idx in d)]
        print(f"Number Left: {len(dihedral_list)} => {len(filtered_dihedral_list)}")
        return filtered_dihedral_list

    def filter_remove_ring(self, dihedral_list):
        """ Filter dihedrals, remove any dihedral that's inside a ring """
        rings = self.bond_graph.get_rings()
        print(f"Filter: Removing dihedrals that the center bond is in any of the rings {rings}")
        # build a dictionary stores the index of ring each atom belongs to
        d_rings = collections.defaultdict(set)
        for i_ring, ring in enumerate(rings):
            for atom_idx in ring:
                d_rings[atom_idx].add(i_ring)
        # go over dihedrals and check if any four belong to the same ring
        filtered_dihedral_list = []
        for i, j, k, l in dihedral_list:
            if d_rings[j] & d_rings[k]:
                print(f"Dihedral {i}-{j}-{k}-{l} skipped because bond {j}-{k} is in a ring")
            else:
                filtered_dihedral_list.append([i,j,k,l])
        print(f"Number Left: {len(dihedral_list)} => {len(filtered_dihedral_list)}")
        return filtered_dihedral_list

    def filter_keep_unique_center(self, dihedral_list):
        """ Keep only one dihedral for each unique center bond """
        print(f"Filter: Keep only one dihedral for each center bond")
        filtered_dihedral_list = []
        d_center_bond = collections.defaultdict(list)
        for i,j,k,l in dihedral_list:
            center_bond = (j,k) if j < k else (k,j)
            d_center_bond[center_bond].append([i,j,k,l])
        for center_bond, dihedral_candidates in d_center_bond.items():
            print(f"best dihedral among {dihedral_candidates}:")
            best_dihedral = self.find_best_dihedral_same_center_bond(dihedral_candidates)
            print(best_dihedral)
            filtered_dihedral_list.append(best_dihedral)
        print(f"Number Left: {len(dihedral_list)} => {len(filtered_dihedral_list)}")
        return filtered_dihedral_list

    def find_best_dihedral_same_center_bond(self, dihedral_candidates):
        """ Find the best dihedral among candidates with same center bond
        Definition of best dihedral i-j-k-l: (From Lee-Ping)
        Temporarily disconnect all i-j bonds, then check the total number of connected atoms for each i,
        Same method applies to all candidates of l.
        The dihedral angle with the maximum connected_i + connected_j wins
        Return a single dihedral as [i, j, k, l]
        """
        if len(dihedral_candidates) == 0: return
        # check center bond are all the same
        _, center_j, center_k, _ = next(iter(dihedral_candidates))
        assert all(j==center_j and k==center_k for i,j,k,l in dihedral_candidates), "all candidates should share same center"
        # build new bond graph with only heavy atoms
        heavy_atom_bonds = [[b1, b2] for b1, b2 in self.m.bonds if self.m.elem[b1] != 'H' and self.m.elem[b2] != 'H']
        # get a new bond graph with only heavy atoms
        bond_graph = BondGraph(heavy_atom_bonds)
        # find the best i among all candidates
        i_candidates = {i for i,_,_,_ in dihedral_candidates}
        # compute and store the number of connected atoms
        n_connected_i = {}
        if len(i_candidates) == 1:
            n_connected_i[i_candidates.pop()] = 0
        else:
            # temporarily remove all i-j bonds
            for i in i_candidates:
                bond_graph.remove_bond(i, center_j)
            # compare i_candidates and find the one with most connected atom
            for i in i_candidates:
                # get all atoms connect to i in the temporary graph
                n_connected_i[i] = len(bond_graph.get_connected_nodes(i))
            print(f"n_connected for each i: {n_connected_i}")
            # add back all i-j bonds
            for i in i_candidates:
                bond_graph.add_bond(i, center_j)
        # find the best_l among all candidates
        l_candidates = {l for _,_,_,l in dihedral_candidates}
        n_connected_l = {}
        if len(l_candidates) == 1:
            n_connected_l[l_candidates.pop()] = 0
        else:
            # temporarily remove all i-j bonds
            for l in l_candidates:
                bond_graph.remove_bond(center_k, l)
            # compare i_candidates and find the one with most connected atom
            for l in l_candidates:
                # get all atoms connect to i in the temporary graph
                n_connected_l[l] = len(bond_graph.get_connected_nodes(l))
            print(f"n_connected for each l: {n_connected_l}")
        # get the best dihedral
        best_dihedral = max(dihedral_candidates, key=lambda d: n_connected_i[d[0]] + n_connected_l[d[3]])
        return best_dihedral

    def write_dihedrals(self, dihedral_list, filename):
        with open(filename, 'w') as outfile:
            json.dump(dihedral_list, outfile, indent=2)

    def find_dihedral_pairs(self, pattern='ring-a-ring'):
        res = []
        if pattern == 'ring-a-ring':
            # Find all pairs of dihedrals that two center bonds are
            # (ring-a)-ring and ring-(a-ring)
            rings = self.bond_graph.get_rings()
            if len(rings) >= 2:
                rsets = [set(ring) for ring in rings]
                for r1, r2 in combinations(rsets, 2):
                    # find all paths between the two rings
                    all_paths = self.bond_graph.find_all_paths(r1, r2)
                    # we want only one path, and the path has len == 3 (exactly one bridge atom)
                    if len(all_paths) != 1 or len(all_paths[0]) != 3: continue
                    left, center, right = all_paths[0]
                    print(f"Found ring-a-ring: {r1}-{center}-{r2}")
                    # since both ends are rings, the (effective size)* of end groups are the same, randomly pick one
                    left_neighbor = (self.bond_graph[left] & r1).pop()
                    right_neighbor = (self.bond_graph[right] & r2).pop()
                    # add dihedral pair to result list
                    dihedral_pair = ((left_neighbor, left, center, right), (left, center, right, right_neighbor))
                    # filter out straight angles
                    if self.avoid_angles_set:
                        avoid_angles = {(left_neighbor, left, center), (left, center, right), (center, right, right_neighbor)} & self.avoid_angles_set
                        if avoid_angles:
                            print(f"{dihedral_pair} ignored because angle {avoid_angles} should be avoided")
                            continue
                    print(f"{dihedral_pair} added")
                    res.append(dihedral_pair)
        res.sort()
        return res
예제 #2
0
from forcebalance.molecule import Molecule
from bond_graph import BondGraph

mol_folder = 'processed_molecules/mol2'
total_count = 0
mol_with_bridges = []
for f in sorted(os.listdir(mol_folder)):
    fn = os.path.join(mol_folder, f)
    m = Molecule(fn)
    bg = BondGraph(m.bonds)
    rings = bg.get_rings()
    if len(rings) >= 2:
        rsets = [set(ring) for ring in rings]
        for r1, r2 in combinations(rsets, 2):
            # find all paths between the two rings
            all_paths = bg.find_all_paths(r1, r2)
            # we want only one path, and the path has len == 3 (exactly one bridge atom)
            if len(all_paths) == 1 and len(all_paths[0]) == 3:
                #print(f'found ring-bridge molecule {f}')
                mol_with_bridges.append(f)
                break
    total_count += 1


for f in mol_with_bridges:
    print(f)

print(f"Among {total_count}, found {len(mol_with_bridges)} molecules with bridge between rings")