Ejemplo n.º 1
0
def test_scheme_11():
    """SCHEME 11:

    Rule 6: Remove Rings of Sizes 3, 5, and 6 First

    Rings of sizes 3, 5, and 6 are more frequent and also synthetically more easily accessible than rings of other
    sizes. The majority of the commercially available building blocks contain rings of size 3, 5, or 6. If rings of
    different sizes occur, they are likely to be built up intentionally to fulfill a dedicated purpose. Often, such
    rings are retained throughout a whole series of bioactive compounds. Good examples for this are penicillin,
    diazepam, and imipramin together with their related “me-too” analogue compounds.
    """

    #  11a
    test_smiles = 'C1C2N(C1=O)CCS2'
    correct_smiles = canon('O=C1CCN1')
    incorrect_smiles = canon('C1CSCN1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags and incorrect_smiles not in frags

    # 11b - Epinastine
    test_smiles = 'NC1=NCC2N1C1=CC=CC=C1CC1=CC=CC=C21'
    correct_smiles = canon('C1=CCNC=CC1')
    incorrect_smiles = canon('C1=NCCN1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags and incorrect_smiles not in frags
Ejemplo n.º 2
0
def test_scheme_19():
    """ SCHEME 19:

    Whole Result: The whole process is illustrated for Baccatin III
    Murcko -> Rule 3 -> Rule 4 -> Rule 4 -> Rule 6
    """

    # Baccatin III
    test_smiles = 'CC1=C2C(C(=O)C3(C(CC4C(C3C(C(C2(C)C)(CC1O)O)OC(=O)C5=CC=CC=C5)(CO4)OC(=O)C)O)C)OC(=O)C'

    original_results = [  # FILTER RULE
        canon('O=C(OC1C2CCC=C(CC(=O)C3CCC4OCC4C31)C2)c1ccccc1'),  # Murcko
        canon('O=C1CC2=CCCC(C2)CC2C1CCC1OCC12'),  # Rule 3
        canon('O=C1CC2=CCCC(C2)CC2CCCCC12'),  # Rule 4
        canon('O=C1CCCC2CCC=C(C1)C2'),  # Rule 4
        canon('O=C1CCCCCCC1')  # Rule 6
    ]

    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]

    for result in original_results:
        assert result in frags

    assert len(frags) == 5
Ejemplo n.º 3
0
def test_scheme_9():
    """ SCHEME 9:

    Rule 4: See scheme 7
    """

    # Rhynchophylline
    test_smiles = 'CC[C@H]1CN2CC[C@]3(C(=O)Nc4ccccc43)[C@@H]2C[C@@H]1/C(=C\\OC)C(=O)OC'
    correct_smiles = canon('O=C1NC=CC12CCNC2')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
Ejemplo n.º 4
0
def test_scheme_16():
    """SCHEME 16:

    Rule 12:  Remove Rings First Where the Linker Is Attached to a Ring Hetero-atom at Either End of the Linker
    Ring heteroatoms are more easy to functionalise and, therefore, are often functionalised in the later stage
    of a chemical library synthesis and thus less characteristic for a chemical scaffold
    """

    # Deferasirox
    test_smiles = 'O=C1C=CC=C/C1=C1\\N/C(=C2/C=CC=CC2=O)N(c2ccc(C(=O)O)cc2)N1'
    correct_smiles = canon('O=C1C=CC=CC1=C1NNC(=C2C=CC=CC2=O)N1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
Ejemplo n.º 5
0
def test_scheme_8():
    """ SCHEME 8:
    Rule 4: See scheme 7
    """

    # Sophocarpine
    test_smiles = 'C1CC2CN3C(CC=CC3=O)C4C2N(C1)CCC4'
    correct_smiles = canon('C1CC2CNCC3CCCN(C1)C23')
    incorrect_smiles_1 = canon('O=C1C=CCC2C3CCCNC3CCN12')
    incorrect_smiles_2 = canon('O=C1C=CCC2CC3NCCCC3CN12')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
    assert incorrect_smiles_1 not in frags and incorrect_smiles_2 not in frags
Ejemplo n.º 6
0
def test_scheme_5():
    """ SCHEME 5:

    Rule 2: Do Not Remove Rings with ≥ 12 Atoms if There Are Still Smaller Rings To Remove.

    If a structure contains a macrocycle, this is considered to be the most characteristic ring system occurring in
    the molecule. Therefore, it should be retained. Especially, cyclic peptides may have bicyclic indole side chains
    from tryptophane which would be favored by the more general rules
    """

    # Seglitide (test smiles)
    ts = 'CC(C)C1NC(=O)C(CCCCN)NC(=O)C(Cc2c[nH]c3ccccc23)NC(=O)C(Cc2ccc(O)cc2)NC(=O)C(C)N(C)C(=O)C(Cc2ccccc2)NC1=O'
    result_smiles = canon('O=C1CNC(=O)CNC(=O)CNC(=O)CNC(=O)CNC(=O)CN1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(ts))
    assert Chem.MolToSmiles(frags[-1]) == result_smiles
Ejemplo n.º 7
0
def test_scheme_15():
    """SCHEME 15:

    Rule 11: For Mixed Aromatic/Non-aromatic Ring Systems, Retain Non-aromatic Rings with Priority

    Aromatic systems are extremely frequent, and benzene is the most frequent ring in practically all data sets.
    In order to avoid too many compounds to be linked to benzene as the parent scaffold, this rule is introduced
    """

    # Sertraline
    test_smiles = 'CN[C@H]1CC[C@@H](C2=CC(Cl)=C(Cl)C=C2)C2=CC=CC=C12'
    correct_smiles = canon('C1=CCCCC1')
    incorrect_smiles = canon('c1ccccc1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags and incorrect_smiles not in frags
Ejemplo n.º 8
0
def test_scheme_14():
    """SCHEMA 14:

    Rule 9: If the Number of Heteroatoms Is Equal, the Priority of Heteroatoms to Retain is N > O > S

    This rule is motivated by the important role that N heterocycles play in medicinal chemistry. Sulfur has the
    lowest priority, because it is not able to undergo H-bonding.
    """

    # Ticlopidine
    test_smiles = 'ClC1=CC=CC=C1CN1CCC2=C(C1)C=CS2'
    correct_smiles = canon('C1=CCNCC1')  # rdkit canonical
    incorrect_smiles = canon('c1ccsc1')  # rdkit canonical
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags and incorrect_smiles not in frags
Ejemplo n.º 9
0
def test_scheme_13():
    """SCHEME 13:

    Rule 8: Remove Rings with the Least Number of Heteroatoms First

    Exocyclic double-bonded heteroatoms (for example, exocyclic carbonyl groups) are not counted as heterocyclic
    atoms. For example, in the indole ring, the pyrrol ring is retained instead of the benzene
    """

    # Indole
    test_smiles = 'C1=C2C(=CC=C1)[N]C=C2'
    correct_smiles = canon('C1=C[N]C=C1')
    incorrect_smiles = canon('c1ccccc1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags and incorrect_smiles not in frags
Ejemplo n.º 10
0
def test_scheme_12():
    """SCHEME 12:

    Rule 7:  A Fully Aromatic Ring System Must Not Be Dissected in a Way That the Resulting System Is Not
             Aromatic Any More

    The conversion of aromatic in non-aromatic rings is chemically non-intuitive, and would also affect the
    geometry of the ring atoms
    """

    # Zaleplon
    test_smiles = 'O=C(C)N(CC)C1=CC=CC(C2=CC=NC3=C(C=NN23)C#N)=C1'
    correct_smiles = canon('c1cn[nH]c1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
Ejemplo n.º 11
0
def test_scheme_10():
    """SCHEME 10:

    Rule 5: Bridged Ring Systems Are Retained with Preference over Spiro Ring Systems

    Under certain circumstances, ring systems containing ring fusions as well as bridged rings can be dissected to
    produce a spiro ring or alternatively a bridged ring. Both solutions would have the same |Δ| value.
    Therefore, in the cases where the two remaining sub-scaffolds have the same value for |Δ|, the ring system with
    a positive signed value of Δ is to be retained.
    """

    # Cafestol
    test_smiles = 'OC[C@@]5(O)C[C@@]31C[C@@H]5CC[C@H]1[C@]4(C)CCc2occc2[C@H]4CC3'
    correct_smiles = canon('C1CC2CCC(C1)C2')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
Ejemplo n.º 12
0
def test_scheme_6():
    """SCHEME 6:

    Rule 3: Choose the Parent Scaffold Having the Smallest Number of Acyclic Linker Bonds

    This leads to the removal of linked rings before removing fused rings. Rings linked by longer chains are removed
    first. Linkers are usually the most likely point of a retrosynthetic disconnection. In the synthesis of
    combinatorial libraries, the variable side chains are often attached to a cyclic core by some linking reaction
    creating an acyclic linker. Whenever different cyclic side chains are used, their pruning at an early stage leads
    to the preservation of the common core of the library. Therefore, it is intuitive to dissect scaffolds at acyclic
    linkers. Also, this helps in retaining preferentially more rigid scaffolds which are more likely to have a
    unique interaction pattern.
    """

    # Flucloxacillin murcko scaffold
    test_smiles = 'O=C(NC1C(=O)N2CCSC12)c1conc1-c1ccccc1'
    result_smiles = canon('O=C(NC1C(=O)N2CCSC12)c1cnoc1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    assert Chem.MolToSmiles(frags[1]) == result_smiles
Ejemplo n.º 13
0
def test_scheme_4():
    """SCHEME 4:

    Rule 1: Remove Heterocycles of Size 3 First

    As an exception to the general rules, the fusion bond connecting the three-membered ring with other rings is
    converted into a double bond. This rule is intended to deal with epoxides and aziridines. This rule treats
    such systems as functional groups which are removed beforehand, rather than as rings. This reflects the situation
    that epoxides are usually generated by the oxidation of a double bond, and also many natural products exist often
    in forms with and without epoxidized double bonds
    """

    # Epothilone A
    test_smiles = 'CC1CCCC2C(O2)CC(OC(=O)CC(C(C(=O)C(C1O)C)(C)C)O)C(=CC3=CSC(=N3)C)C'
    # Epothilone C
    result_smiles = canon('O=C1CCCCCCC=CCC(C=Cc2cscn2)OC(=O)CCC1')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    assert frags[1].GetRingInfo().NumRings() == 2
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert result_smiles in frags
Ejemplo n.º 14
0
def test_scheme_18():
    """ SCHEME 18:

    Whole result:
    The whole process is illustrated on a set of four diazepinenones, one of the best known classes of anxiolytics:
    (diazepam, bromazepam, zolazepam, and clotiazepam)

    It can be seen that the molecular frameworks of these four drugs are different despite the fact that they
    are usually regarded as belonging to the same class of compounds. In all four cases, the linked ring is
    removed first according to rule 3. This already leads to the grouping of diazepam and bromazepam into the
    same scaffold class, whereas the other two drugs are still in their distinct classes. After the removal of
    the five- or six-membered aromatic ring attached to the diazepinenone ring system according to rule 6,
    the seven membered diazepinenone ring remaining is equal for all four molecules.
    """

    d = 'CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3'  # Diazepam
    b = 'C1C(=O)NC2=C(C=C(C=C2)Br)C(=N1)C3=CC=CC=N3'  # Bromazepam
    z = 'CC1=NN(C2=C1C(=NCC(=O)N2C)C3=CC=CC=C3F)C'  # Zolazepam
    c = 'CCC1=CC2=C(S1)N(C(=O)CN=C2C3=CC=CC=C3Cl)C'  # Clotiazepam

    original_results = [  # MOLECULES              |  # HIERARCHY
        canon('O=C1CN=C(c2ccccc2)c2ccccc2N1'),  # Diazepam (murcko)      |  (3)
        canon('O=C1CN=Cc2ccccc2N1'),  # Diazepam + Bromazepam  |  (2)
        canon('O=C1CN=C(c2ccccn2)c2ccccc2N1'),  # Bromazepam (murcko)    |  (3)
        canon(
            'O=C1CN=C(c2ccccc2)c2cn[nH]c2N1'),  # Zolazepam  (murcko)    |  (3)
        canon('O=C1CN=Cc2cn[nH]c2N1'),  # Zolazepam              |  (2)
        canon('O=C1CN=C(c2ccccc2)c2ccsc2N1'),  # Clotiazepam (murcko)   |  (3)
        canon('O=C1CN=Cc2ccsc2N1'),  # Clotiazepam            |  (2)
        canon('O=C1CN=CC=CN1'),  # ALL                    |  (1)
        # < TOTAL: 8 > #
    ]

    molecules = [Chem.MolFromSmiles(x) for x in [d, b, z, c]]
    frags = list(chain(*[tree_frags_from_mol(x) for x in molecules]))
    frags = {Chem.MolToSmiles(x) for x in frags}

    for result in original_results:
        assert result in frags

    assert len(frags) == 8
Ejemplo n.º 15
0
def test_scheme_7():
    """SCHEME 7:

    Rule 4: Retain Bridged Rings, Spiro Rings, and Nonlinear Ring Fusion Patterns with Preference

    These patterns are unusual structural features occurring less frequently than normally fused rings.
    They have non-planar, characteristic molecular shapes, which distinguishes them from the majority of the more
    planar organic molecules. In most ring systems, we have a linear fusion with no atoms in common to more than
    two rings. This is, for example, the case in steroids. In such cases, the number of bonds being a member in more
    than one ring nrrb is equal to the number of rings nR − 1. The more bridges or nonlinear ring fusions there are,
    the higher the number of nrrb is. On the other hand, nrrb decreases if there are spiro connected ring systems,
    because the spiro connections lead to no bond in common to both rings. Therefore, we remove that ring with
    preference where the remaining scaffold has the highest value for |Δ| = |nrrb − (nR − 1)|.
    """

    # Pentazocine
    test_smiles = 'C[C@H]1[C@H]2Cc3ccc(O)cc3[C@]1(C)CCN2CC=C(C)C'
    correct_smiles = canon('C1=CC2CCNC(C1)C2')
    incorrect_smiles = canon('c1ccc2c(c1)CCCC2')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
    assert incorrect_smiles not in frags
Ejemplo n.º 16
0
def test_scheme_17():
    """SCHEME 17:

    Rule 13: Tie-breaking Rule

    Remaining ties are solved by choosing from several possible remaining sub-scaffolds that one, whose rdkit canonical
    SMILES, has the lower rank in alphabetical order. Although the nature of this tie-breaking rule is arbitrary,
    the use of this rule in the classification does not mean that it will lead to a completely arbitrary overall
    class assignment.

    This result is different to the original publication as rdkit's canonicalisation algorithm is different to
    MolInspiration's algorithm used in the original
    """

    # Ormeloxifene
    test_smiles = 'CC1([C@@H]([C@H](c2ccc(cc2O1)OC)c3ccc(cc3)OCCN4CCCC4)c5ccccc5)C'
    hierarchy_3_1 = canon('c1ccc(C2COc3ccccc3C2)cc1')
    hierarchy_3_2 = canon('c1ccc(C2CCOc3ccccc32)cc1')
    correct_smiles = sorted([hierarchy_3_1, hierarchy_3_2])[0]
    hierarchy_2_correct = canon('c1ccc2c(c1)CCCO2')
    frags = tree_frags_from_mol(Chem.MolFromSmiles(test_smiles))
    frags = [Chem.MolToSmiles(x) for x in frags]
    assert correct_smiles in frags
    assert hierarchy_2_correct in frags