コード例 #1
0
def sitagliptin_replacement() -> GoalDirectedBenchmark:
    # Find a molecule dissimilar to sitagliptin, but with the same properties
    smiles = "Fc1cc(c(F)cc1F)CC(N)CC(=O)N3Cc2nnc(n2CC3)C(F)(F)F"
    sitagliptin = Chem.MolFromSmiles(smiles)
    target_logp = logP(sitagliptin)
    target_tpsa = tpsa(sitagliptin)

    similarity = TanimotoScoringFunction(smiles,
                                         fp_type="ECFP4",
                                         score_modifier=GaussianModifier(
                                             mu=0, sigma=0.1))
    lp = RdkitScoringFunction(descriptor=logP,
                              score_modifier=GaussianModifier(mu=target_logp,
                                                              sigma=0.2))
    tp = RdkitScoringFunction(descriptor=tpsa,
                              score_modifier=GaussianModifier(mu=target_tpsa,
                                                              sigma=5))
    isomers = IsomerScoringFunction("C16H15F6N5O")

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(
        name="Sitagliptin MPO",
        objective=GeometricMeanScoringFunction([similarity, lp, tp, isomers]),
        contribution_specification=specification,
    )
コード例 #2
0
def smarts_with_other_target(smarts: str, other_molecule: str) -> ScoringFunction:
    smarts_scoring_function = SMARTSScoringFunction(target=smarts)
    other_mol = Chem.MolFromSmiles(other_molecule)
    target_logp = logP(other_mol)
    target_tpsa = tpsa(other_mol)
    target_bertz = bertz(other_mol)

    lp = RdkitScoringFunction(descriptor=logP,
                              score_modifier=GaussianModifier(mu=target_logp, sigma=0.2))
    tp = RdkitScoringFunction(descriptor=tpsa,
                              score_modifier=GaussianModifier(mu=target_tpsa, sigma=5))
    bz = RdkitScoringFunction(descriptor=bertz,
                              score_modifier=GaussianModifier(mu=target_bertz, sigma=30))

    return GeometricMeanScoringFunction([smarts_scoring_function, lp, tp, bz])
コード例 #3
0
def weird_physchem() -> GoalDirectedBenchmark:
    min_bertz = RdkitScoringFunction(descriptor=bertz,
                                     score_modifier=MaxGaussianModifier(
                                         mu=1500, sigma=200))

    mol_under_400 = RdkitScoringFunction(descriptor=mol_weight,
                                         score_modifier=MinGaussianModifier(
                                             mu=400, sigma=40))

    aroma = RdkitScoringFunction(descriptor=num_aromatic_rings,
                                 score_modifier=MinGaussianModifier(mu=3,
                                                                    sigma=1))

    fluorine = RdkitScoringFunction(descriptor=AtomCounter('F'),
                                    score_modifier=GaussianModifier(mu=6,
                                                                    sigma=1.0))

    opt_weird = ArithmeticMeanScoringFunction(
        [min_bertz, mol_under_400, aroma, fluorine])

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Physchem MPO',
                                 objective=opt_weird,
                                 contribution_specification=specification)
コード例 #4
0
def start_pop_ranolazine() -> GoalDirectedBenchmark:
    ranolazine = 'COc1ccccc1OCC(O)CN2CCN(CC(=O)Nc3c(C)cccc3C)CC2'

    modifier = ClippedScoreModifier(upper_x=0.7)
    similar_to_ranolazine = TanimotoScoringFunction(ranolazine,
                                                    fp_type='AP',
                                                    score_modifier=modifier)

    logP_under_4 = RdkitScoringFunction(descriptor=logP,
                                        score_modifier=MaxGaussianModifier(
                                            mu=7, sigma=1))

    aroma = RdkitScoringFunction(descriptor=num_aromatic_rings,
                                 score_modifier=MinGaussianModifier(mu=1,
                                                                    sigma=1))

    fluorine = RdkitScoringFunction(descriptor=AtomCounter('F'),
                                    score_modifier=GaussianModifier(mu=1,
                                                                    sigma=1.0))

    optimize_ranolazine = ArithmeticMeanScoringFunction(
        [similar_to_ranolazine, logP_under_4, fluorine, aroma])

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Ranolazine MPO',
                                 objective=optimize_ranolazine,
                                 contribution_specification=specification,
                                 starting_population=[ranolazine])
コード例 #5
0
def test_isomer_scoring_function_penalizes_incorrect_number_atoms():
    c11h24_arithmetic = IsomerScoringFunction('C12H24',
                                              mean_function='arithmetic')
    c11h24_geometric = IsomerScoringFunction('C12H24',
                                             mean_function='geometric')

    # all those smiles fit the formula C11H24O
    smiles1 = 'CCCCCCCCOCCC'
    smiles2 = 'CC(CCOC)CCCCCC'
    smiles3 = 'COCCCC(CC(C)CC)C'

    # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms
    c_score = GaussianModifier(mu=0, sigma=1)(1.0)
    n_atoms_score = 1.0
    h_score = 1.0
    expected_score_arithmetic = (n_atoms_score + c_score + h_score) / 3.0
    expected_score_geometric = (n_atoms_score * c_score * h_score)**(1 / 3)

    assert c11h24_arithmetic.score(smiles1) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_arithmetic.score(smiles2) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_arithmetic.score(smiles3) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_geometric.score(smiles1) == pytest.approx(
        expected_score_geometric)
    assert c11h24_geometric.score(smiles2) == pytest.approx(
        expected_score_geometric)
    assert c11h24_geometric.score(smiles3) == pytest.approx(
        expected_score_geometric)
コード例 #6
0
def ranolazine_mpo() -> GoalDirectedBenchmark:
    """
    Make start_pop_ranolazine more polar and add a fluorine
    """
    ranolazine = "COc1ccccc1OCC(O)CN2CCN(CC(=O)Nc3c(C)cccc3C)CC2"

    modifier = ClippedScoreModifier(upper_x=0.7)
    similar_to_ranolazine = TanimotoScoringFunction(ranolazine,
                                                    fp_type="AP",
                                                    score_modifier=modifier)

    logP_under_4 = RdkitScoringFunction(descriptor=logP,
                                        score_modifier=MaxGaussianModifier(
                                            mu=7, sigma=1))

    tpsa_f = RdkitScoringFunction(descriptor=tpsa,
                                  score_modifier=MaxGaussianModifier(mu=95,
                                                                     sigma=20))

    fluorine = RdkitScoringFunction(descriptor=AtomCounter("F"),
                                    score_modifier=GaussianModifier(mu=1,
                                                                    sigma=1.0))

    optimize_ranolazine = GeometricMeanScoringFunction(
        [similar_to_ranolazine, logP_under_4, fluorine, tpsa_f])

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(
        name="Ranolazine MPO",
        objective=optimize_ranolazine,
        contribution_specification=specification,
        starting_population=[ranolazine],
    )
コード例 #7
0
def pioglitazone_mpo() -> GoalDirectedBenchmark:
    # pioglitazone with same mw but less rotatable bonds
    smiles = 'O=C1NC(=O)SC1Cc3ccc(OCCc2ncc(cc2)CC)cc3'
    pioglitazone = Chem.MolFromSmiles(smiles)
    target_molw = mol_weight(pioglitazone)

    similarity = TanimotoScoringFunction(smiles, fp_type='ECFP4',
                                         score_modifier=GaussianModifier(mu=0, sigma=0.1))
    mw = RdkitScoringFunction(descriptor=mol_weight,
                              score_modifier=GaussianModifier(mu=target_molw, sigma=10))
    rb = RdkitScoringFunction(descriptor=num_rotatable_bonds,
                              score_modifier=GaussianModifier(mu=2, sigma=0.5))

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Pioglitazone MPO',
                                 objective=GeometricMeanScoringFunction([similarity, mw, rb]),
                                 contribution_specification=specification)
コード例 #8
0
def test_gaussian_function():
    mu = -1.223
    sigma = 0.334

    f = GaussianModifier(mu=mu, sigma=sigma)

    assert f(mu) == 1.0
    assert f(scalar_value) == gaussian(scalar_value, mu, sigma)
    assert np.allclose(f(value_array), gaussian(value_array, mu, sigma))
コード例 #9
0
def tpsa_benchmark(target: float) -> GoalDirectedBenchmark:
    benchmark_name = f'TPSA (target: {target})'
    objective = RdkitScoringFunction(descriptor=tpsa,
                                     score_modifier=GaussianModifier(mu=target, sigma=20.0))

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name=benchmark_name,
                                 objective=objective,
                                 contribution_specification=specification)
コード例 #10
0
    def __init__(self, molecular_formula: str) -> None:
        """
        Args:
            molecular_formula: target molecular formula
        """
        super().__init__()

        element_occurrences = parse_molecular_formula(molecular_formula)

        total_number_atoms = sum(element_tuple[1] for element_tuple in element_occurrences)

        # scoring functions for each element
        self.functions = [RdkitScoringFunction(descriptor=AtomCounter(element),
                                               score_modifier=GaussianModifier(mu=n_atoms, sigma=1.0))
                          for element, n_atoms in element_occurrences]

        # scoring functions for the total number of atoms
        self.functions.append(RdkitScoringFunction(descriptor=num_atoms,
                                                   score_modifier=GaussianModifier(mu=total_number_atoms, sigma=2.0)))
コード例 #11
0
def amlodipine_rings() -> GoalDirectedBenchmark:
    # amlodipine with 3 rings
    amlodipine = TanimotoScoringFunction(r'Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC', fp_type='ECFP4')
    rings = RdkitScoringFunction(descriptor=num_rings,
                                 score_modifier=GaussianModifier(mu=3, sigma=0.5))

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Amlodipine MPO',
                                 objective=GeometricMeanScoringFunction([amlodipine, rings]),
                                 contribution_specification=specification)
コード例 #12
0
def perindopril_rings() -> GoalDirectedBenchmark:
    # perindopril with two aromatic rings
    perindopril = TanimotoScoringFunction('O=C(OCC)C(NC(C(=O)N1C(C(=O)O)CC2CCCCC12)C)CCC',
                                          fp_type='ECFP4')
    arom_rings = RdkitScoringFunction(descriptor=num_aromatic_rings,
                                      score_modifier=GaussianModifier(mu=2, sigma=0.5))

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Perindopril MPO',
                                 objective=GeometricMeanScoringFunction([perindopril, arom_rings]),
                                 contribution_specification=specification)
コード例 #13
0
    def determine_scoring_functions(
            molecular_formula: str) -> List[RdkitScoringFunction]:
        element_occurrences = parse_molecular_formula(molecular_formula)

        total_number_atoms = sum(element_tuple[1]
                                 for element_tuple in element_occurrences)

        # scoring functions for each element
        functions = [
            RdkitScoringFunction(descriptor=AtomCounter(element),
                                 score_modifier=GaussianModifier(mu=n_atoms,
                                                                 sigma=1.0))
            for element, n_atoms in element_occurrences
        ]

        # scoring functions for the total number of atoms
        functions.append(
            RdkitScoringFunction(descriptor=num_atoms,
                                 score_modifier=GaussianModifier(
                                     mu=total_number_atoms, sigma=2.0)))

        return functions
コード例 #14
0
def test_isomer_scoring_function_penalizes_incorrect_number_atoms():
    c11h24 = IsomerScoringFunction('C12H24')

    # all those smiles fit the formula C11H24O
    smiles1 = 'CCCCCCCCOCCC'
    smiles2 = 'CC(CCOC)CCCCCC'
    smiles3 = 'COCCCC(CC(C)CC)C'

    # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms
    penalty_tot_num_atoms = 1.0 - GaussianModifier(mu=0, sigma=1)(1.0)
    expected_score = 1.0 - penalty_tot_num_atoms / 3.0

    assert c11h24.score(smiles1) == pytest.approx(expected_score)
    assert c11h24.score(smiles2) == pytest.approx(expected_score)
    assert c11h24.score(smiles3) == pytest.approx(expected_score)