Esempio n. 1
0
def compute_similarity(mref, molecules, fp_type="DL"):
    fp_ref = fps.fingerprint(mref, fp_type)
    for i, m in enumerate(molecules):
        if m:
            yield ml.molec_properties(m, fp_ref=fp_ref, fp_type=fp_type)
        else:
            print("Molecule {}".format(i))
Esempio n. 2
0
def combi_substructure_search(sdfs, molecules_db):
    print("Searching for substructure")
    for i, m in tqdm(enumerate(molecules_db)):
        if not m:
            print("Skipping {}".format(i))
            continue
        substructs_found = [False] * len(sdfs)
        for i, sdf in enumerate(sdfs):
            for m_ref in Chem.SDMolSupplier(sdf):
                if m.HasSubstructMatch(m_ref, useChirality=True):
                    substructs_found[i] = True
        if all(substructs_found):
            yield ml.molec_properties(m)
Esempio n. 3
0
def search_substructure(molecule_query, molecules_db):
    print("Searching for substructure")
    all_substructs_found = True
    for i, m in tqdm(enumerate(molecules_db)):
        if not m:
            print("Skipping {}".format(i))
            continue
        all_substructs_found = True
        for m_ref in molecule_query:
            if not m.HasSubstructMatch(m_ref, useChirality=True):
                all_substructs_found = False
        if all_substructs_found:
            yield ml.molec_properties(m)
Esempio n. 4
0
def compute_similarity_several_mols(mrefs, molecules, fp_type="DL"):
    fp_refs = [
        fps.fingerprint(mref, fp_type=fp_type) for mref in mrefs if mref
    ]
    for m in molecules:
        if m:
            molec = ml.molec_properties(m, fp_type=fp_type)
            molec.similarities = [
                DataStructs.FingerprintSimilarity(fp_ref, molec.fp)
                for fp_ref in fp_refs
            ]
            yield molec
        else:
            pass
Esempio n. 5
0
def compute_similarity_severl_fp(
        mref,
        molecules,
        fp_types=["DL", "circular", "torsions", "MACCS"],
        tresholds=[0.7, 0.4, 0.7, 0.4]):
    for i, m in tqdm(enumerate(molecules)):
        chosen = False
        for j, fp_type in enumerate(fp_types):
            fp_ref = fps.fingerprint(mref, fp_type=fp_type)
            if not chosen:
                if m:
                    molec = ml.molec_properties(m,
                                                fp_ref=fp_ref,
                                                fp_type=fp_type)
                    if molec.similarity > float(tresholds[j]):
                        chosen = True
                        yield molec
                else:
                    print("Molecule {}".format(i))
            else:
                break
def main(molecule_query,
         molecules_db,
         n_bins=100,
         fp_types=["DL", "MACCS", "circular"],
         test=None,
         dim_type="umap"):

    #Analysis code
    for fp_type in fp_types:

        # Retrieve fingerprints and similarity for analysis
        counts = []
        svg = [None] * 100
        fp = [None] * 100
        similarity = [0] * 100
        for m in tqdm(
                sim.compute_similarity(molecule_query, molecules_db, fp_type)):
            #Build similarity counts
            counts_step, bins = pt.histogram(m.similarity, n_bins=n_bins)
            counts = np.add(counts,
                            counts_step) if len(counts) != 0 else counts_step

            #Build fingeprints
            idx = np.argmin(similarity)
            less_similar = similarity[idx]
            if m.similarity > less_similar:
                fp[idx] = m.fp
                similarity[idx] = m.similarity
                svg[idx] = m.to_svg()

        #Plot similarity distribution
        if not test:
            fig, ax = plt.subplots()
            ax.hist(bins[1:], bins, weights=counts)
            plt.savefig("similarity_hist_{}.png".format(fp_type))

        #Retrieve similarity distribution data
        values = np.vstack([bins[1:], counts]).T
        np.savetxt("counts_{}.txt".format(fp_type), values, fmt=['%f', '%d'])

        if fp_type == "circular":
            #Add reference molecule
            mref = ml.molec_properties(molecule_query, fp_type=fp_type)
            fp.insert(0, mref.fp)
            similarity.insert(0, 1)
            svg.insert(0, mref.to_svg())

            #Clean results
            svg = [value for value in svg if value]
            fp = [value for value in fp if value]
            similarity = [value for value in similarity if value != 0]

            #Reduce Dimension
            X = np.asarray([fp2arr(f) for f in fp])
            embedding = dm.ReduceDimension(X, 2).run(dim_type)

            #Plot Reduce Dimension graph
            if not test:
                pt.interactive_map(embedding[:, 0],
                                   embedding[:, 1],
                                   svg,
                                   color=np.array(similarity))