예제 #1
0
    def extract_shingles(self, individual):

        qry_shingles = set()

        radius_constr = self.radius + 1

        # Reloading molecule to make it aromatic
        mol = MolFromSmiles(individual.to_aromatic_smiles())

        for atm_idx in range(individual.mol_graph.GetNumAtoms()):
            for N in range(1, radius_constr):
                bonds = AllChem.FindAtomEnvironmentOfRadiusN(mol, N, atm_idx)

                if not bonds:
                    break

                # the reportedly faster method
                atoms = set()
                for bond_id in bonds:
                    bond = mol.GetBondWithIdx(bond_id)
                    atoms.add(bond.GetBeginAtomIdx())
                    atoms.add(bond.GetEndAtomIdx())

                if self.rooted:
                    new_shingle = Chem.rdmolfiles.MolFragmentToSmiles(
                        mol, list(atoms), bonds, 0, 0, False, False, atm_idx,
                        True, False, False)
                else:
                    new_shingle = Chem.rdmolfiles.MolFragmentToSmiles(
                        mol, list(atoms), bonds, 0, 0, False, False, -1, True,
                        False, False)

                qry_shingles.add(new_shingle)

        return qry_shingles
예제 #2
0
def extract_shingles(smiles, level, as_list=False):
    """
    Extracting up to the given level from the given smiles
    see https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8
    """

    if as_list:
        qry_shingles = list()
    else:
        qry_shingles = set()

    radius_constr = level + 1

    # Reloading molecule to make it aromatic
    mol = MolFromSmiles(smiles)

    for atm_idx in range(mol.GetNumAtoms()):
        for N in range(1, radius_constr):
            bonds = AllChem.FindAtomEnvironmentOfRadiusN(mol, N, atm_idx)

            if not bonds:
                break

            # the reportedly faster method
            atoms = set()
            for bond_id in bonds:
                bond = mol.GetBondWithIdx(bond_id)
                atoms.add(bond.GetBeginAtomIdx())
                atoms.add(bond.GetEndAtomIdx())

            # Computed rooted shingle
            new_shingle = Chem.rdmolfiles.MolFragmentToSmiles(
                mol, list(atoms), bonds, 0, 0, False, False, atm_idx, True,
                False, False)
            if as_list:
                qry_shingles.append(new_shingle)
            else:
                qry_shingles.add(new_shingle)

    return qry_shingles