Beispiel #1
0
def test(a_mol, b_smiles, transform):
    a_prods = transform.RunReactants([a_mol])
    if not a_prods:
        return False

    a_prods = [standardize(i) for i in chain(*a_prods)]

    if not any(["[*]" in i for i in a_prods]):
        if b_smiles in a_prods:
            return True
        return False

    else:
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True
        qp.adjustDegree = True
        qp.adjustDegreeFlags = Chem.ADJUST_IGNOREDUMMIES

        a_prods = [Chem.AddHs(i) for i in a_prods]
        a_prods = [Chem.AdjustQueryProperties(i, qp) for i in a_prods]

        b_mol = Chem.MolFromSmiles(b_smiles)
        b_mol = Chem.AddHs(b_mol)

        if any([b_mol.HasSubstructMatch(i) for i in a_prods]):
            return True

        return False
Beispiel #2
0
    def _load_template(self, path):
        """
        Loads a template molecule with 2D coordinates

        Args:
            path (str): path to the model molecule in *.sdf,
                or *.pdb format

        Raises:
            ValueError: if unsupported format is used: sdf|pdb

        Returns:
            rdkit.Chem.rdchem.Mol: RDKit representation of the template
        """
        mol = Chem.RWMol()
        extension = os.path.basename(path).split('.')[1]

        if extension == 'sdf':
            mol = Chem.MolFromMolFile(path, sanitize=True, removeHs=True)
        elif extension == 'pdb':
            mol = Chem.MolFromPDBFile(path, sanitize=True, removeHs=True)
        else:
            raise ValueError(
                'Unsupported molecule type \'{}\''.format(extension))

        p = Chem.AdjustQueryParameters()
        p.makeAtomsGeneric = True
        p.makeBondsGeneric = True

        mol = Chem.AdjustQueryProperties(mol, p)

        return mol
Beispiel #3
0
def _queryfromrequest(suffix='_query'):
    # get errors on stderr:
    tgt = request.get_json()
    if tgt is None:
        tgt = request.values

    sio = sys.stderr = StringIO()
    if 'smiles' + suffix in tgt:
        mol = Chem.MolFromSmiles(tgt.get('smiles' + suffix), sanitize=False)
        if mol is not None:
            try:
                Chem.SanitizeMol(mol)
            except:
                mol = None
    elif 'smarts' + suffix in tgt:
        mol = Chem.MolFromSmarts(tgt.get('smarts' + suffix))
    elif 'mol' + suffix in tgt:
        mol = Chem.MolFromMolBlock(tgt.get('mol' + suffix), removeHs=False)
        mol = Chem.AdjustQueryProperties(mol)
    else:
        return None
    if mol is None:
        errm = sio.getvalue()
        # some errors leave blank lines
        errm = errm.replace('RDKit ERROR: \n', '')
        raise InvalidUsage(
            "Molecule could not be processed. Error message was:\n%s" % errm,
            status_code=411)
    return mol
Beispiel #4
0
    def refine(self):

        for frag_id in range(self.N_frag):
            frag        = self.frag_list[frag_id]
            mol_id_list = self.frag2mol[frag_id]

            self.frag2mol_mapping.append(list())

            for mol_id in mol_id_list:
                mol  = self.mol_list[mol_id]

                self.qp.makeDummiesQueries = True
                mol  = Chem.AdjustQueryProperties(mol, self.qp)
                frag = Chem.AdjustQueryProperties(frag, self.qp)

                matches = mol.GetSubstructMatches(frag, useChirality=True)
                if len(matches)>0:
                    self.frag2mol_mapping[-1].append(list(matches[0]))
                else:
                    self.frag2mol_mapping[-1].append(list())
Beispiel #5
0
    def add_frag_list(self, frag_list, mol):

        self.mol_list.append(mol)
        self.mol2frag.append(list())
        new_mol_id  = self.N_mol
        self.N_mol += 1

        if len(frag_list)==0:
            frag_list = [mol]

        for frag in frag_list:

            new_frag_id = -1

            for frag_id, frag_db in enumerate(self.frag_list):
                self.qp.makeDummiesQueries = False
                frag_db = Chem.AdjustQueryProperties(frag_db, self.qp)
                frag    = Chem.AdjustQueryProperties(frag, self.qp)
                if are_mol_same(frag_db, frag, useChirality=True):
                    ### If we are here, then the fragment is already
                    ### in the database
                    new_frag_id = frag_id
                    break

            if new_frag_id == -1:
                ### If we are here, then the fragment is new
                self.frag_list.append(frag)
                self.frag2mol.append(list())
                new_frag_id  = self.N_frag
                self.N_frag += 1

            if new_mol_id not in self.frag2mol[new_frag_id]:
                self.frag2mol[new_frag_id].append(new_mol_id)
                if len(self.frag2mol[new_frag_id])>self.max_frag2mol:
                    self.max_frag2mol=len(self.frag2mol[new_frag_id])

            if new_frag_id not in self.mol2frag[new_mol_id]:
                self.mol2frag[new_mol_id].append(new_frag_id)
                if len(self.mol2frag[new_mol_id])>self.max_mol2frag:
                    self.max_mol2frag=len(self.mol2frag[new_mol_id])
def flatten_tartrate_mol(m):
    tartrate = Chem.MolFromSmarts('OC(=O)C(O)C(O)C(=O)O')
    # make sure we only match free tartrate/tartaric acid fragments
    params = Chem.AdjustQueryParameters.NoAdjustments()
    params.adjustDegree = True
    params.adjustDegreeFlags = Chem.AdjustQueryWhichFlags.ADJUST_IGNORENONE
    tartrate = Chem.AdjustQueryProperties(tartrate, params)
    matches = m.GetSubstructMatches(tartrate)
    if matches:
        m = Chem.Mol(m)
        for match in matches:
            m.GetAtomWithIdx(match[3]).SetChiralTag(
                Chem.ChiralType.CHI_UNSPECIFIED)
            m.GetAtomWithIdx(match[5]).SetChiralTag(
                Chem.ChiralType.CHI_UNSPECIFIED)
    return m
Beispiel #7
0
    def _get_ligands(self):
        ligands_df = pd.read_sql_query(
            """
                select pdbid, mol_send(molecule) as molecule, atoms, rings, aromatic_rings, weight
                from {ligands}
                where molecule is not null
        """.format(ligands=PopulateLigandsScript.LIGANDS_DB), self.conn)

        params = Chem.AdjustQueryParameters()
        params.makeAtomsGeneric = True
        params.makeBondsGeneric = True
        params.adjustRingCount = True

        ligands_df.loc[:, "molecule"] = ligands_df.loc[:, "molecule"].apply(
            lambda m: Chem.Mol(m.tobytes()))
        ligands_df.loc[:, "pattern"] = ligands_df.loc[:, "molecule"].apply(
            lambda m: Chem.AdjustQueryProperties(m, params))

        return ligands_df
Beispiel #8
0
 def query_core(self):
     if self.core:
         ps = Chem.AdjustQueryParameters.NoAdjustments()
         ps.makeDummiesQueries = True
         return Chem.AdjustQueryProperties(self.core, ps)
Beispiel #9
0
def decomposition(gdatarec_lib,
                  gdata_lib,
                  mode,
                  parms=6,
                  pairs=True,
                  parmsfile=None,
                  frag_file=None,
                  map_file=None,
                  radiusadd=[0., 3.],
                  softness=1.,
                  softcut=2.,
                  pairfile=None,
                  exclude=None,
                  paircut=0.0,
                  prefix=None,
                  scaling=2.0,
                  verbose=False):

    if verbose:
        print "Start mapout procedure with"
        print "mode      = %d" % mode
        print "softness  = %6.3f" % softness
        print "softcut   = %6.3f" % softcut
        print "parmsfile = %s" % parmsfile

    if verbose:
        print "Organizing and preparing data ..."

    mode_dict = dict()
    mode_dict = {
        0: mode0,
        1: mode1,
        3: mode3,
        4: mode4,
        5: mode5,
        6: mode6,
        7: mode7
    }

    if mode in mode_dict.keys():
        fitmode = mode_dict[mode]
    else:
        mode_error(mode)

    has_cplxlig = True
    if mode in [0, 1]:
        has_cplxlig = False

    fitter = fitmode(gdatarec_lib,
                     gdata_lib,
                     parms=parms,
                     pairs=False,
                     radiusadd=radiusadd,
                     softness=softness,
                     softcut=softcut,
                     scaling=scaling,
                     verbose=verbose)

    parmdict = read_parmsfile(parmsfile)

    ### Find position of SES in parms file
    A_SSE = -1
    B_SSE = -1
    for i, entry in enumerate(parmdict["header"]):
        if entry.startswith("SSE"):
            if entry.endswith("(A)"):
                A_SSE = i
            elif entry.endswith("(B)"):
                B_SSE = i

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    ### Find the best Candidate Solutions ###
    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###

    ### Collect all the solution candiates
    N_entries = len(parmdict.keys()) - 1

    A_list = list()
    B_list = list()
    x_list = list()
    A_list_tmp = list()
    B_list_tmp = list()
    x_list_tmp = list()

    for key, value in parmdict.items():
        if key == "header":
            continue
        A_list_tmp.append(value[A_SSE])
        B_list_tmp.append(value[B_SSE])
        x_list_tmp.append(value[:fitter._parms])

    if fitter.decomp:
        N_entries = N_entries / 2
        for i in range(N_entries):
            A_list.append([
                copy.copy(A_list_tmp[2 * i]),
                copy.copy(A_list_tmp[2 * i + 1])
            ])
            B_list.append([
                copy.copy(B_list_tmp[2 * i]),
                copy.copy(B_list_tmp[2 * i + 1])
            ])
            x_list.append(copy.copy(x_list_tmp[2 * i]))
    else:
        A_list = copy.copy(A_list_tmp)
        B_list = copy.copy(B_list_tmp)
        x_list = copy.copy(x_list_tmp)

    A_list = np.array(A_list)
    B_list = np.array(B_list)

    ### Find the best candidate solution
    if fitter.decomp:
        ndf, dl, dc, ndr = pygmo.fast_non_dominated_sorting(A_list)
        ordered_ndf = list()
        for front in ndf:
            ordered_ndf.append(pygmo.sort_population_mo(A_list[front]))

    else:
        ordered_ndf = np.argsort(A_list, axis=0)

    if fitter.decomp:
        best_x_A = np.array(x_list[ordered_ndf[0][0]])
    else:
        best_x_A = np.array(x_list[ordered_ndf[0]])

    ### ~~~~~~~~~~~~~~~~~~~~~~ ###
    ### Prepare Exclusion List ###
    ### ~~~~~~~~~~~~~~~~~~~~~~ ###

    if exclude != None \
    and exclude != "":
        exclude_list = list()
        with open(exclude, "r") as fopen:
            for line in fopen:
                l = line.rstrip().lstrip().split()
                if len(l) == 0:
                    continue
                if l[0].startswith("#"):
                    continue
                for s in l:
                    exclude_list.append(s)
    else:
        exclude_list = list()

    ### ~~~~~~~~~~~~~~~~~~~~~~~ ###
    ### Prepare Pairise Fitting ###
    ### ~~~~~~~~~~~~~~~~~~~~~~~ ###

    if pairs:
        if pairfile != None \
        and pairfile != "":
            if type(pairfile) != str:
                raise TypeError(
                    "The path to pairfile must be of type str, but is of type %s"
                    % type(pairfile))
            pairlist = read_pairsfile(pairfile, paircut)

            pairlist_idx = list()
            for pair in pairlist:
                for i in range(fitter.N_case):
                    case1 = fitter.select[i]
                    name1 = fitter.name[case1]
                    if name1 in exclude_list:
                        continue
                    for j in range(fitter.N_case):
                        if j <= i:
                            continue
                        case2 = fitter.select[j]
                        name2 = fitter.name[case2]
                        if name2 in exclude_list:
                            continue
                        if name1==pair[0] \
                        and name2==pair[1]:
                            pairlist_idx.append([case1, case2])
                        elif name1==pair[1] \
                        and name2==pair[0]:
                            pairlist_idx.append([case2, case1])

        else:
            pairlist = None
            pairlist_idx = list()
            for i in range(fitter.N_case):
                name1 = fitter.name[i]
                if name1 in exclude_list:
                    continue
                for j in range(fitter.N_case):
                    if j <= i:
                        continue
                    name2 = fitter.name[j]
                    if name2 in exclude_list:
                        continue
                    pairlist_idx.append([i, j])

    else:
        pairlist = None
        pairlist_idx = None

    ### ~~~~~~~~~~~~~~~~~ ###
    ### Build the Library ###
    ### ~~~~~~~~~~~~~~~~~ ###

    has_extlib = False
    ### Check for external mapping files
    if frag_file != None \
    and frag_file != "":
        has_extlib = True
        ext_frag = list()
        ext_frag_name = list()
        with open(frag_file, "r") as fopen:
            for line in fopen:
                l = line.rstrip().lstrip().split()
                if len(l) == 0:
                    continue
                if l[0].startswith("#"):
                    continue
                ext_frag.append(Chem.MolFromSmiles(l[1]))
                ext_frag_name.append(l[0])
    else:
        ext_frag = None
        ext_frag_name = None

    if map_file != None \
    and map_file != "":
        ext_map_frag = list()
        ext_map_inds = list()
        ext_map_name = list()
        with open(map_file, "r") as fopen:
            for line in fopen:
                l = line.rstrip().lstrip().split()
                if len(l) == 0:
                    continue
                if l[0].startswith("#"):
                    continue
                ext_map_name.append(l[0])
                ext_map_frag.append(list())
                ext_map_inds.append(list())
                ids_list = l[1].split(",")
                if len(ids_list) == 1:
                    if ids_list[0] == "-1":
                        continue
                for i in ids_list:
                    ext_map_frag[-1].append(int(i))
                for s in l[2:]:
                    ext_map_inds[-1].append(list())
                    for i in s.split(","):
                        ext_map_inds[-1][-1].append(int(i))
    else:
        ext_map_frag = None
        ext_map_inds = None
        ext_map_name = None


    if ext_frag==None \
    and ext_map_frag!=None:
        raise IOError("Must provide both, frag_file and map_file.")

    if ext_frag!=None \
    and ext_map_frag==None:
        raise IOError("Must provide both, frag_file and map_file.")

    if has_extlib:
        mol2extmol = list()
        #frag2extfrag = list()
        if has_cplxlig:
            mol2extmol_cplx = list()
            #frag2extfrag_cplx = list()
            mol2extmol_lig = list()
            #frag2extfrag_lig  = list()

    if verbose:
        "Starting fragment decomposition..."
    RAND = np.random.randint(9999)
    frag_lib = frag_library()
    if has_cplxlig:
        frag_lib_cplx = frag_library()
        frag_lib_lig = frag_library()
    progs = aux_progs(verbose)
    for case in range(fitter.N_case):

        valid_poses = np.where(fitter.ind_case == case)[0]
        name = fitter.name[case]

        for pose in valid_poses:
            pmd_instance = fitter.pdat[pose]

            pmd_instance.save("p%d.mol2" % RAND)

            args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % (
                RAND, RAND)
            progs.call(progs.ante_exe, args)

            mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False)

            if verbose:
                AllChem.Compute2DCoords(mol)

            if has_extlib:
                index = ext_map_name.index(name)
                frag_list = list()
                for frag_id in ext_map_frag[index]:
                    frag_list.append(ext_frag[frag_id])
                    ### If we have an external library with mappings
                    ### we must do the refinement manually!
                mol2extmol.append(index)
            else:
                frag_list = get_frag_list(mol)

            frag_lib.add_frag_list(frag_list, mol)

            os.remove("p%d.mol2" % RAND)
            os.remove("p%d_sybyl.mol2" % RAND)

        if has_cplxlig:

            valid_poses_cplx = np.where(fitter.ind_case_cplx == case)[0]
            valid_poses_lig = np.where(fitter.ind_case_lig == case)[0]

            for pose in valid_poses_cplx:
                pmd_instance = fitter.pdat_cplx[pose]

                pmd_instance.save("p%d.mol2" % RAND)

                args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % (
                    RAND, RAND)
                progs.call(progs.ante_exe, args)

                mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND,
                                           removeHs=False)

                if verbose:
                    AllChem.Compute2DCoords(mol)

                if has_extlib:
                    index = ext_map_name.index(name)
                    frag_list = list()
                    for frag_id in ext_map_frag[index]:
                        frag_list.append(ext_frag[frag_id])
                        ### If we have an external library with mappings
                        ### we must do the refinement manually!
                    mol2extmol_cplx.append(index)
                else:
                    frag_list = get_frag_list(mol)

                frag_lib_cplx.add_frag_list(frag_list, mol)

                os.remove("p%d.mol2" % RAND)
                os.remove("p%d_sybyl.mol2" % RAND)

            for pose in valid_poses_lig:
                pmd_instance = fitter.pdat_lig[pose]

                pmd_instance.save("p%d.mol2" % RAND)

                args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % (
                    RAND, RAND)
                progs.call(progs.ante_exe, args)

                mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND,
                                           removeHs=False)

                if verbose:
                    AllChem.Compute2DCoords(mol)

                if has_extlib:
                    index = ext_map_name.index(name)
                    frag_list = list()
                    for frag_id in ext_map_frag[index]:
                        frag_list.append(ext_frag[frag_id])
                        ### If we have an external library with mappings
                        ### we must do the refinement manually!
                    mol2extmol_lig.append(index)
                else:
                    frag_list = get_frag_list(mol)

                frag_lib_lig.add_frag_list(frag_list, mol)

                os.remove("p%d.mol2" % RAND)
                os.remove("p%d_sybyl.mol2" % RAND)

    if has_extlib:
        for frag_id in range(frag_lib.N_frag):
            frag_lib.frag2mol_mapping.append(list())
            for mol_id in frag_lib.frag2mol[frag_id]:

                frag_id_rank = frag_lib.mol2frag[mol_id].index(frag_id)
                ext_mol_id = mol2extmol[mol_id]

                if len(ext_map_inds[ext_mol_id]) == 0:
                    ### If we are here, then the molecule has no fragments.
                    ### The molecule is then treated, as if itself would
                    ### be the fragment
                    mol = frag_lib.mol_list[mol_id]
                    matches = range(mol.GetNumAtoms())
                else:
                    matches = ext_map_inds[ext_mol_id][frag_id_rank]

                frag_lib.frag2mol_mapping[-1].append(matches)

        if has_cplxlig:
            for frag_id in range(frag_lib_cplx.N_frag):
                frag_lib_cplx.frag2mol_mapping.append(list())
                for mol_id in frag_lib_cplx.frag2mol[frag_id]:

                    frag_id_rank = frag_lib_cplx.mol2frag[mol_id].index(
                        frag_id)
                    ext_mol_id = mol2extmol_cplx[mol_id]

                    if len(ext_map_inds[ext_mol_id]) == 0:
                        ### If we are here, then the molecule has no fragments.
                        ### The molecule is then treated, as if itself would
                        ### be the fragment
                        mol = frag_lib_cplx.mol_list[mol_id]
                        matches = range(mol.GetNumAtoms())
                    else:
                        matches = ext_map_inds[ext_mol_id][frag_id_rank]

                    frag_lib_cplx.frag2mol_mapping[-1].append(matches)

            for frag_id in range(frag_lib_lig.N_frag):
                frag_lib_lig.frag2mol_mapping.append(list())
                for mol_id in frag_lib_lig.frag2mol[frag_id]:

                    frag_id_rank = frag_lib_lig.mol2frag[mol_id].index(frag_id)
                    ext_mol_id = mol2extmol_lig[mol_id]

                    if len(ext_map_inds[ext_mol_id]) == 0:
                        ### If we are here, then the molecule has no fragments.
                        ### The molecule is then treated, as if itself would
                        ### be the fragment
                        mol = frag_lib_lig.mol_list[mol_id]
                        matches = range(mol.GetNumAtoms())
                    else:
                        matches = ext_map_inds[ext_mol_id][frag_id_rank]

                    frag_lib_lig.frag2mol_mapping[-1].append(matches)

    else:
        frag_lib.refine()

        if has_cplxlig:
            frag_lib_cplx.refine()
            frag_lib_lig.refine()

    if verbose:
        print "Poses Fragments..."
        for case in range(fitter.N_case):
            name = fitter.name[case]
            valid_poses = np.where(fitter.ind_case == case)[0]
            print name,
            for pose in valid_poses:
                print frag_lib.mol2frag[pose],
            print ""
        frag_lib.draw("pos_")

        if has_cplxlig:
            print "Cplx Fragments..."
            for case in range(fitter.N_case):
                name = fitter.name[case]
                valid_poses = np.where(fitter.ind_case_cplx == case)[0]
                print name,
                for pose in valid_poses:
                    print frag_lib_cplx.mol2frag[pose],
                print ""
            frag_lib_cplx.draw("cplx_")

            print "Lig Fragments..."
            for case in range(fitter.N_case):
                name = fitter.name[case]
                valid_poses = np.where(fitter.ind_case_lig == case)[0]
                print name,
                for pose in valid_poses:
                    print frag_lib_lig.mol2frag[pose],
                print ""
            frag_lib_lig.draw("lig_")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    ### Calculate the Fragment weightings ###
    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###

    if verbose:
        print "Calculate fragment weightings..."
    ### Constructor for weight_fitting:
    ### def __init__(self, fitter, x, frag_library, prefix=None, verbose=False):
    weight = weight_fitting(fitter, best_x_A, pairs, frag_lib, "pos", verbose)
    weight.process_rec = True
    weight.process_cplx = False
    weight.process_lig = False
    if has_cplxlig:
        weight_cplx = weight_fitting(fitter, best_x_A, pairs, frag_lib_cplx,
                                     "cplx", verbose)
        weight_cplx.process_rec = False
        weight_cplx.process_cplx = True
        weight_cplx.process_lig = False
        weight_lig = weight_fitting(fitter, best_x_A, pairs, frag_lib_lig,
                                    "lig", verbose)
        weight_lig.process_rec = False
        weight_lig.process_cplx = False
        weight_lig.process_lig = True

    ### Make the fragment-based decomposition of the GIST grids
    for case in range(fitter.N_case):
        weight.set_case(case)
        ### Use the internal write routine as a callback for the process routine
        weight.process(weight.simple_weighting)
        if has_cplxlig:
            weight_cplx.set_case(case)
            weight_lig.set_case(case)
            weight_cplx.process(weight_cplx.simple_weighting)
            weight_lig.process(weight_lig.simple_weighting)

    ### Combine the individual poses and get the final
    ### contributions of the fragments
    calc_data = np.zeros((2, fitter.N_case, frag_lib.N_frag), dtype=DOUBLE)
    frag_assign = np.zeros((fitter.N_case, frag_lib.N_frag), dtype=int)
    frag_assign[:] = -1
    if has_cplxlig:
        calc_data_cplx = np.zeros((2, fitter.N_case, frag_lib_cplx.N_frag),
                                  dtype=DOUBLE)
        frag_assign_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag),
                                    dtype=int)
        frag_assign_cplx[:] = -1
        calc_data_lig = np.zeros((2, fitter.N_case, frag_lib_lig.N_frag),
                                 dtype=DOUBLE)
        frag_assign_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag),
                                   dtype=int)
        frag_assign_lig[:] = -1

    for case in range(fitter.N_case):
        weight.set_case(case)
        _data, _assign = weight.combine()
        calc_data[0, case, :] = np.copy(_data[0])
        calc_data[1, case, :] = np.copy(_data[1])
        frag_assign[case, :] = np.copy(_assign)
        if has_cplxlig:

            weight_cplx.set_case(case)
            _data, _assign = weight_cplx.combine()
            calc_data_cplx[0, case, :] = np.copy(_data[0])
            calc_data_cplx[1, case, :] = np.copy(_data[1])
            frag_assign_cplx[case, :] = np.copy(_assign)

            weight_lig.set_case(case)
            _data, _assign = weight_lig.combine()
            calc_data_lig[0, case, :] = np.copy(_data[0])
            calc_data_lig[1, case, :] = np.copy(_data[1])
            frag_assign_lig[case, :] = np.copy(_assign)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    ### Evaluate the Fragment Properties ###
    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###

    if has_cplxlig:

        case2frag_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag),
                                  dtype=int)
        case2frag_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag),
                                 dtype=int)
        case2frag_cplx[:] = -1
        case2frag_lig[:] = -1

        for case in range(fitter.N_case):

            valids = np.where(frag_assign[case] > -1)[0]
            valids_cplx = np.where(frag_assign_cplx[case] > -1)[0]
            valids_lig = np.where(frag_assign_lig[case] > -1)[0]

            for frag_id in frag_assign[case, valids]:

                frag_lib.qp.makeDummiesQueries = False
                frag_lib_cplx.qp.makeDummiesQueries = False
                frag_lib_lig.qp.makeDummiesQueries = False

                frag = Chem.AdjustQueryProperties(frag_lib.frag_list[frag_id],\
                                                    frag_lib.qp)

                for frag_id_cplx in frag_assign_cplx[case, valids_cplx]:
                    frag_cplx = Chem.AdjustQueryProperties(frag_lib_cplx.frag_list[frag_id_cplx],\
                                                            frag_lib_cplx.qp)
                    if are_mol_same(frag, frag_cplx, useChirality=True):
                        case2frag_cplx[case, frag_id_cplx] = frag_id
                        break

                for frag_id_lig in frag_assign_lig[case, valids_lig]:
                    frag_lig  = Chem.AdjustQueryProperties(frag_lib_lig.frag_list[frag_id_lig],\
                                                            frag_lib_lig.qp)
                    if are_mol_same(frag, frag_lig, useChirality=True):
                        case2frag_lig[case, frag_id_lig] = frag_id
                        break
Beispiel #10
0
def align_mol_to_frags(smi_molecule, smi_linker, smi_frags):
    try:
        # Load SMILES as molecules
        mol = Chem.MolFromSmiles(smi_molecule)
        frags = Chem.MolFromSmiles(smi_frags)
        linker = Chem.MolFromSmiles(smi_linker)
        # Include dummy atoms in query
        du = Chem.MolFromSmiles('*')
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True

        # Renumber molecule based on frags (incl. dummy atoms)
        aligned_mols = []

        sub_idx = []
        # Get matches to fragments and linker
        qfrag = Chem.AdjustQueryProperties(frags, qp)
        frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False))
        qlinker = Chem.AdjustQueryProperties(linker, qp)
        linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False))

        # Loop over matches
        for frag_match, linker_match in product(frags_matches, linker_matches):
            # Check if match
            f_match = [
                idx for num, idx in enumerate(frag_match)
                if frags.GetAtomWithIdx(num).GetAtomicNum() != 0
            ]
            l_match = [
                idx for num, idx in enumerate(linker_match)
                if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
                and idx not in f_match
            ]
            # If perfect match, break
            if len(set(list(f_match) +
                       list(l_match))) == mol.GetNumHeavyAtoms():
                break
        # Add frag indices
        sub_idx += frag_match
        # Add linker indices to end
        sub_idx += [
            idx for num, idx in enumerate(linker_match)
            if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
            and idx not in sub_idx
        ]

        aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx))
        aligned_mols.append(frags)

        nodes_to_keep = [i for i in range(len(frag_match))]

        # Renumber dummy atoms to end
        dummy_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                dummy_idx.append(atom.GetIdx())
        for i, mol in enumerate(aligned_mols):
            sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2))
            for idx in dummy_idx:
                sub_idx.remove(idx)
                sub_idx.append(idx)
            if i == 0:
                mol_range = list(range(mol.GetNumHeavyAtoms()))
            else:
                mol_range = list(range(mol.GetNumHeavyAtoms() + 2))
            idx_to_add = list(set(mol_range).difference(set(sub_idx)))
            sub_idx.extend(idx_to_add)
            aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx)

        # Get exit vectors
        exit_vectors = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                if atom.GetIdx() in nodes_to_keep:
                    nodes_to_keep.remove(atom.GetIdx())
                for nei in atom.GetNeighbors():
                    exit_vectors.append(nei.GetIdx())

        if len(exit_vectors) != 2:
            print("Incorrect number of exit vectors")

        return (aligned_mols[0], aligned_mols[1]), nodes_to_keep, exit_vectors

    except:
        print("Could not align")
        return ([], []), [], []
def compute_distance_and_angle(mol, smi_linker, smi_frags):
    try:
        frags = [Chem.MolFromSmiles(frag) for frag in smi_frags.split(".")]
        frags = Chem.MolFromSmiles(smi_frags)
        linker = Chem.MolFromSmiles(smi_linker)
        # Include dummy in query
        du = Chem.MolFromSmiles('*')
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True
        # Renumber based on frags (incl. dummy atoms)
        aligned_mols = []

        sub_idx = []
        # Align to frags and linker
        qfrag = Chem.AdjustQueryProperties(frags, qp)
        frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False))
        qlinker = Chem.AdjustQueryProperties(linker, qp)
        linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False))

        # Loop over matches
        for frag_match, linker_match in product(frags_matches, linker_matches):
            # Check if match
            f_match = [
                idx for num, idx in enumerate(frag_match)
                if frags.GetAtomWithIdx(num).GetAtomicNum() != 0
            ]
            l_match = [
                idx for num, idx in enumerate(linker_match)
                if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
                and idx not in f_match
            ]
            if len(set(list(f_match) +
                       list(l_match))) == mol.GetNumHeavyAtoms():
                #if len(set(list(frag_match)+list(linker_match))) == mol.GetNumHeavyAtoms():
                break
        # Add frag indices
        sub_idx += frag_match
        # Add linker indices to end
        sub_idx += [
            idx for num, idx in enumerate(linker_match)
            if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
            and idx not in sub_idx
        ]

        nodes_to_keep = [i for i in range(len(frag_match))]

        aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx))
        aligned_mols.append(frags)

        # Renumber dummy atoms to end
        dummy_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                dummy_idx.append(atom.GetIdx())
        for i, mol in enumerate(aligned_mols):
            sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2))
            for idx in dummy_idx:
                sub_idx.remove(idx)
                sub_idx.append(idx)
            if i == 0:
                mol_range = list(range(mol.GetNumHeavyAtoms()))
            else:
                mol_range = list(range(mol.GetNumHeavyAtoms() + 2))
            idx_to_add = list(set(mol_range).difference(set(sub_idx)))
            sub_idx.extend(idx_to_add)
            aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx)

        # Get exit vectors
        exit_vectors = []
        linker_atom_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                if atom.GetIdx() in nodes_to_keep:
                    nodes_to_keep.remove(atom.GetIdx())
                for nei in atom.GetNeighbors():
                    exit_vectors.append(nei.GetIdx())
                linker_atom_idx.append(atom.GetIdx())

        # Get coords
        conf = aligned_mols[0].GetConformer()
        exit_coords = []
        for exit in exit_vectors:
            exit_coords.append(np.array(conf.GetAtomPosition(exit)))
        linker_coords = []
        for linker_atom in linker_atom_idx:
            linker_coords.append(np.array(conf.GetAtomPosition(linker_atom)))

        # Get angle
        v1_u = unit_vector(linker_coords[0] - exit_coords[0])
        v2_u = unit_vector(linker_coords[1] - exit_coords[1])
        angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

        # Get linker length
        linker = Chem.MolFromSmiles(smi_linker)
        linker_length = linker.GetNumHeavyAtoms()

        # Get distance
        distance = np.linalg.norm(exit_coords[0] - exit_coords[1])

        # Record results
        return distance, angle

    except:
        print(Chem.MolToSmiles(mol), smi_linker, smi_frags)
        return None, None
def get_linker(full_mol, clean_frag, starting_point):
    # INPUT FORMAT: molecule (RDKit mol object), clean fragments (RDKit mol object), starting fragments (SMILES)

    # Get matches of fragments
    matches = list(full_mol.GetSubstructMatches(clean_frag))

    # If no matches, terminate
    if len(matches) == 0:
        print("No matches")
        return ""

    # Get number of atoms in linker
    linker_len = full_mol.GetNumHeavyAtoms() - clean_frag.GetNumHeavyAtoms()
    if linker_len == 0:
        return ""

    # Setup
    mol_to_break = Chem.Mol(full_mol)
    Chem.Kekulize(full_mol, clearAromaticFlags=True)

    poss_linker = []

    if len(matches) > 0:
        # Loop over matches
        for match in matches:
            mol_rw = Chem.RWMol(full_mol)
            # Get linker atoms
            linker_atoms = list(
                set(list(range(
                    full_mol.GetNumHeavyAtoms()))).difference(match))
            linker_bonds = []
            atoms_joined_to_linker = []
            # Loop over starting fragments atoms
            # Get (i) bonds between starting fragments and linker, (ii) atoms joined to linker
            for idx_to_delete in sorted(match, reverse=True):
                nei = [
                    x.GetIdx() for x in mol_rw.GetAtomWithIdx(
                        idx_to_delete).GetNeighbors()
                ]
                intersect = set(nei).intersection(set(linker_atoms))
                if len(intersect) == 1:
                    linker_bonds.append(
                        mol_rw.GetBondBetweenAtoms(
                            idx_to_delete,
                            list(intersect)[0]).GetIdx())
                    atoms_joined_to_linker.append(idx_to_delete)
                elif len(intersect) > 1:
                    for idx_nei in list(intersect):
                        linker_bonds.append(
                            mol_rw.GetBondBetweenAtoms(idx_to_delete,
                                                       idx_nei).GetIdx())
                        atoms_joined_to_linker.append(idx_to_delete)

            # Check number of atoms joined to linker
            # If not == 2, check next match
            if len(set(atoms_joined_to_linker)) != 2:
                continue

            # Delete starting fragments atoms
            for idx_to_delete in sorted(match, reverse=True):
                mol_rw.RemoveAtom(idx_to_delete)

            linker = Chem.Mol(mol_rw)
            # Check linker required num atoms
            if linker.GetNumHeavyAtoms() == linker_len:
                mol_rw = Chem.RWMol(full_mol)
                # Delete linker atoms
                for idx_to_delete in sorted(linker_atoms, reverse=True):
                    mol_rw.RemoveAtom(idx_to_delete)
                frags = Chem.Mol(mol_rw)
                # Check there are two disconnected fragments
                if len(Chem.rdmolops.GetMolFrags(frags)) == 2:
                    # Fragment molecule into starting fragments and linker
                    fragmented_mol = Chem.FragmentOnBonds(
                        mol_to_break, linker_bonds)
                    # Remove starting fragments from fragmentation
                    linker_to_return = Chem.Mol(fragmented_mol)
                    qp = Chem.AdjustQueryParameters()
                    qp.makeDummiesQueries = True
                    for f in starting_point.split('.'):
                        qfrag = Chem.AdjustQueryProperties(
                            Chem.MolFromSmiles(f), qp)
                        linker_to_return = AllChem.DeleteSubstructs(
                            linker_to_return, qfrag, onlyFrags=True)

                    # Check linker is connected and two bonds to outside molecule
                    if len(Chem.rdmolops.GetMolFrags(linker)) == 1 and len(
                            linker_bonds) == 2:
                        Chem.Kekulize(linker_to_return,
                                      clearAromaticFlags=True)
                        # If for some reason a starting fragment isn't removed (and it's larger than the linker), remove (happens v. occassionally)
                        if len(Chem.rdmolops.GetMolFrags(
                                linker_to_return)) > 1:
                            for frag in Chem.MolToSmiles(
                                    linker_to_return).split('.'):
                                if Chem.MolFromSmiles(
                                        frag).GetNumHeavyAtoms() == linker_len:
                                    return frag
                        return Chem.MolToSmiles(
                            Chem.MolFromSmiles(
                                Chem.MolToSmiles(linker_to_return)))

                    # If not, add to possible linkers (above doesn't capture some complex cases)
                    else:
                        fragmented_mol = Chem.MolFromSmiles(
                            Chem.MolToSmiles(fragmented_mol), sanitize=False)
                        linker_to_return = AllChem.DeleteSubstructs(
                            fragmented_mol, Chem.MolFromSmiles(starting_point))
                        poss_linker.append(Chem.MolToSmiles(linker_to_return))

    # If only one possibility, return linker
    if len(poss_linker) == 1:
        return poss_linker[0]
    # If no possibilities, process failed
    elif len(poss_linker) == 0:
        print("FAIL:", Chem.MolToSmiles(full_mol),
              Chem.MolToSmiles(clean_frag), starting_point)
        return ""
    # If multiple possibilities, process probably failed
    else:
        print("More than one poss linker. ", poss_linker)
        return poss_linker[0]
def join_frag_linker(linker, st_pt, random_join=True):

    if linker == "":
        du = Chem.MolFromSmiles('*')
        #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0])
        return Chem.MolToSmiles(
            Chem.RemoveHs(
                AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du,
                                          Chem.MolFromSmiles('[H]'),
                                          True)[0])).split('.')[0]

    combo = Chem.CombineMols(Chem.MolFromSmiles(linker),
                             Chem.MolFromSmiles(st_pt))

    # Include dummy in query
    du = Chem.MolFromSmiles('*')
    qp = Chem.AdjustQueryParameters()
    qp.makeDummiesQueries = True

    qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp)
    linker_atoms = combo.GetSubstructMatches(qlink)
    if len(linker_atoms) > 1:
        for l_atoms in linker_atoms:
            count_dummy = 0
            for a in l_atoms:
                if combo.GetAtomWithIdx(a).GetAtomicNum() == 0:
                    count_dummy += 1
            if count_dummy == 2:
                break
        linker_atoms = l_atoms
    else:
        linker_atoms = linker_atoms[0]
    linker_dummy_bonds = []
    linker_dummy_bonds_at = []
    linker_exit_points = []
    for atom in linker_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            linker_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            linker_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            linker_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp)
    st_pt_atoms = combo.GetSubstructMatches(qst_pt)
    st_pt_atoms = list(
        set(range(combo.GetNumAtoms())).difference(linker_atoms))

    st_pt_dummy_bonds = []
    st_pt_dummy_bonds_at = []
    st_pt_exit_points = []
    for atom in st_pt_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            st_pt_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            st_pt_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            st_pt_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    combo_rw = Chem.EditableMol(combo)

    if random_join:
        np.random.shuffle(st_pt_exit_points)
        for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points):
            if atom_1 == atom_2:
                print(linker, st_pt)
                break
            combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE)

        bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
        for bond in sorted(bonds_to_break, reverse=True):
            combo_rw.RemoveBond(bond[0], bond[1])

        final_mol = combo_rw.GetMol()
        final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                           key=lambda x: len(x),
                           reverse=True)[0]
        return final_mol

    else:
        final_mols = []
        for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]:
            combo_rw = Chem.EditableMol(combo)
            for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts):
                if atom_1 == atom_2:
                    print(linker, st_pt)
                    break
                combo_rw.AddBond(atom_1,
                                 atom_2,
                                 order=Chem.rdchem.BondType.SINGLE)

            bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
            for bond in sorted(bonds_to_break, reverse=True):
                combo_rw.RemoveBond(bond[0], bond[1])

            final_mol = combo_rw.GetMol()
            final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                               key=lambda x: len(x),
                               reverse=True)[0]
            final_mols.append(final_mol)
        return final_mols