예제 #1
0
def test_isomer_scoring_function_penalizes_incorrect_number_atoms():
    c11h24_arithmetic = IsomerScoringFunction('C12H24',
                                              mean_function='arithmetic')
    c11h24_geometric = IsomerScoringFunction('C12H24',
                                             mean_function='geometric')

    # all those smiles fit the formula C11H24O
    smiles1 = 'CCCCCCCCOCCC'
    smiles2 = 'CC(CCOC)CCCCCC'
    smiles3 = 'COCCCC(CC(C)CC)C'

    # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms
    c_score = GaussianModifier(mu=0, sigma=1)(1.0)
    n_atoms_score = 1.0
    h_score = 1.0
    expected_score_arithmetic = (n_atoms_score + c_score + h_score) / 3.0
    expected_score_geometric = (n_atoms_score * c_score * h_score)**(1 / 3)

    assert c11h24_arithmetic.score(smiles1) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_arithmetic.score(smiles2) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_arithmetic.score(smiles3) == pytest.approx(
        expected_score_arithmetic)
    assert c11h24_geometric.score(smiles1) == pytest.approx(
        expected_score_geometric)
    assert c11h24_geometric.score(smiles2) == pytest.approx(
        expected_score_geometric)
    assert c11h24_geometric.score(smiles3) == pytest.approx(
        expected_score_geometric)
예제 #2
0
def test_isomer_scoring_function_returns_one_for_correct_molecule():
    c11h24 = IsomerScoringFunction('C11H24')

    # all those smiles fit the formula C11H24
    smiles1 = 'CCCCCCCCCCC'
    smiles2 = 'CC(CCC)CCCCCC'
    smiles3 = 'CCCCC(CC(C)CC)C'

    assert c11h24.score(smiles1) == 1.0
    assert c11h24.score(smiles2) == 1.0
    assert c11h24.score(smiles3) == 1.0
예제 #3
0
def test_isomer_scoring_function_penalizes_incorrect_number_atoms():
    c11h24 = IsomerScoringFunction('C12H24')

    # all those smiles fit the formula C11H24O
    smiles1 = 'CCCCCCCCOCCC'
    smiles2 = 'CC(CCOC)CCCCCC'
    smiles3 = 'COCCCC(CC(C)CC)C'

    # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms
    penalty_tot_num_atoms = 1.0 - GaussianModifier(mu=0, sigma=1)(1.0)
    expected_score = 1.0 - penalty_tot_num_atoms / 3.0

    assert c11h24.score(smiles1) == pytest.approx(expected_score)
    assert c11h24.score(smiles2) == pytest.approx(expected_score)
    assert c11h24.score(smiles3) == pytest.approx(expected_score)
예제 #4
0
def sitagliptin_replacement() -> GoalDirectedBenchmark:
    # Find a molecule dissimilar to sitagliptin, but with the same properties
    smiles = "Fc1cc(c(F)cc1F)CC(N)CC(=O)N3Cc2nnc(n2CC3)C(F)(F)F"
    sitagliptin = Chem.MolFromSmiles(smiles)
    target_logp = logP(sitagliptin)
    target_tpsa = tpsa(sitagliptin)

    similarity = TanimotoScoringFunction(smiles,
                                         fp_type="ECFP4",
                                         score_modifier=GaussianModifier(
                                             mu=0, sigma=0.1))
    lp = RdkitScoringFunction(descriptor=logP,
                              score_modifier=GaussianModifier(mu=target_logp,
                                                              sigma=0.2))
    tp = RdkitScoringFunction(descriptor=tpsa,
                              score_modifier=GaussianModifier(mu=target_tpsa,
                                                              sigma=5))
    isomers = IsomerScoringFunction("C16H15F6N5O")

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(
        name="Sitagliptin MPO",
        objective=GeometricMeanScoringFunction([similarity, lp, tp, isomers]),
        contribution_specification=specification,
    )
예제 #5
0
def isomers_c7h8n2o2() -> GoalDirectedBenchmark:
    """
    Benchmark to try and get 100 isomers for C7H8N2O2.
    """

    specification = uniform_specification(100)

    return GoalDirectedBenchmark(name='C7H8N2O2',
                                 objective=IsomerScoringFunction('C7H8N2O2'),
                                 contribution_specification=specification)
예제 #6
0
def zaleplon_with_other_formula() -> GoalDirectedBenchmark:
    # zaleplon_with_other_formula with other formula
    zaleplon = TanimotoScoringFunction('O=C(C)N(CC)C1=CC=CC(C2=CC=NC3=C(C=NN23)C#N)=C1',
                                       fp_type='ECFP4')
    formula = IsomerScoringFunction('C19H17N3O2')

    specification = uniform_specification(1, 10, 100)

    return GoalDirectedBenchmark(name='Zaleplon MPO',
                                 objective=GeometricMeanScoringFunction([zaleplon, formula]),
                                 contribution_specification=specification)
예제 #7
0
def isomers_c9h10n2o2pf2cl() -> GoalDirectedBenchmark:
    """
    Benchmark to try and get 100 isomers for C9H10N2O2PF2Cl.
    """

    specification = uniform_specification(100)

    return GoalDirectedBenchmark(
        name='C9H10N2O2PF2Cl',
        objective=IsomerScoringFunction('C9H10N2O2PF2Cl'),
        contribution_specification=specification)
예제 #8
0
def isomers_c11h24() -> GoalDirectedBenchmark:
    """
    Benchmark to try and get all C11H24 molecules there are.
    There should be 159 if one ignores stereochemistry.
    """

    specification = uniform_specification(159)

    return GoalDirectedBenchmark(name='C11H24',
                                 objective=IsomerScoringFunction('C11H24'),
                                 contribution_specification=specification)
예제 #9
0
def isomers_c9h10n2o2pf2cl(mean_function='geometric', n_samples=250) -> GoalDirectedBenchmark:
    """
    Benchmark to try and get 100 isomers for C9H10N2O2PF2Cl.

    Args:
        mean_function: 'arithmetic' or 'geometric'
    """

    specification = uniform_specification(n_samples)

    return GoalDirectedBenchmark(name='C9H10N2O2PF2Cl',
                                 objective=IsomerScoringFunction('C9H10N2O2PF2Cl', mean_function=mean_function),
                                 contribution_specification=specification)
예제 #10
0
def isomers_c7h8n2o2(mean_function='geometric') -> GoalDirectedBenchmark:
    """
    Benchmark to try and get 100 isomers for C7H8N2O2.

    Args:
        mean_function: 'arithmetic' or 'geometric'
    """

    specification = uniform_specification(100)

    return GoalDirectedBenchmark(name='C7H8N2O2',
                                 objective=IsomerScoringFunction('C7H8N2O2', mean_function=mean_function),
                                 contribution_specification=specification)
예제 #11
0
def isomers_c11h24(mean_function='geometric') -> GoalDirectedBenchmark:
    """
    Benchmark to try and get all C11H24 molecules there are.
    There should be 159 if one ignores stereochemistry.

    Args:
        mean_function: 'arithmetic' or 'geometric'
    """

    specification = uniform_specification(159)

    return GoalDirectedBenchmark(name='C11H24',
                                 objective=IsomerScoringFunction('C11H24', mean_function=mean_function),
                                 contribution_specification=specification)
예제 #12
0
def test_isomer_scoring_function_returns_one_for_correct_molecule():
    c11h24_arithmetic = IsomerScoringFunction('C11H24',
                                              mean_function='arithmetic')
    c11h24_geometric = IsomerScoringFunction('C11H24',
                                             mean_function='geometric')

    # all those smiles fit the formula C11H24
    smiles1 = 'CCCCCCCCCCC'
    smiles2 = 'CC(CCC)CCCCCC'
    smiles3 = 'CCCCC(CC(C)CC)C'

    assert c11h24_arithmetic.score(smiles1) == 1.0
    assert c11h24_arithmetic.score(smiles2) == 1.0
    assert c11h24_arithmetic.score(smiles3) == 1.0
    assert c11h24_geometric.score(smiles1) == 1.0
    assert c11h24_geometric.score(smiles2) == 1.0
    assert c11h24_geometric.score(smiles3) == 1.0
예제 #13
0
def test_isomer_scoring_function_uses_geometric_mean_by_default():
    scoring_function = IsomerScoringFunction('C2H4')
    assert scoring_function.mean_function == geometric_mean
예제 #14
0
def test_isomer_scoring_function_invalid_molecule():
    sf = IsomerScoringFunction('C60')

    assert sf.score('CCCinvalid') == sf.corrupt_score