def test_isomer_scoring_function_penalizes_incorrect_number_atoms(): c11h24_arithmetic = IsomerScoringFunction('C12H24', mean_function='arithmetic') c11h24_geometric = IsomerScoringFunction('C12H24', mean_function='geometric') # all those smiles fit the formula C11H24O smiles1 = 'CCCCCCCCOCCC' smiles2 = 'CC(CCOC)CCCCCC' smiles3 = 'COCCCC(CC(C)CC)C' # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms c_score = GaussianModifier(mu=0, sigma=1)(1.0) n_atoms_score = 1.0 h_score = 1.0 expected_score_arithmetic = (n_atoms_score + c_score + h_score) / 3.0 expected_score_geometric = (n_atoms_score * c_score * h_score)**(1 / 3) assert c11h24_arithmetic.score(smiles1) == pytest.approx( expected_score_arithmetic) assert c11h24_arithmetic.score(smiles2) == pytest.approx( expected_score_arithmetic) assert c11h24_arithmetic.score(smiles3) == pytest.approx( expected_score_arithmetic) assert c11h24_geometric.score(smiles1) == pytest.approx( expected_score_geometric) assert c11h24_geometric.score(smiles2) == pytest.approx( expected_score_geometric) assert c11h24_geometric.score(smiles3) == pytest.approx( expected_score_geometric)
def test_isomer_scoring_function_returns_one_for_correct_molecule(): c11h24 = IsomerScoringFunction('C11H24') # all those smiles fit the formula C11H24 smiles1 = 'CCCCCCCCCCC' smiles2 = 'CC(CCC)CCCCCC' smiles3 = 'CCCCC(CC(C)CC)C' assert c11h24.score(smiles1) == 1.0 assert c11h24.score(smiles2) == 1.0 assert c11h24.score(smiles3) == 1.0
def test_isomer_scoring_function_penalizes_incorrect_number_atoms(): c11h24 = IsomerScoringFunction('C12H24') # all those smiles fit the formula C11H24O smiles1 = 'CCCCCCCCOCCC' smiles2 = 'CC(CCOC)CCCCCC' smiles3 = 'COCCCC(CC(C)CC)C' # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms penalty_tot_num_atoms = 1.0 - GaussianModifier(mu=0, sigma=1)(1.0) expected_score = 1.0 - penalty_tot_num_atoms / 3.0 assert c11h24.score(smiles1) == pytest.approx(expected_score) assert c11h24.score(smiles2) == pytest.approx(expected_score) assert c11h24.score(smiles3) == pytest.approx(expected_score)
def sitagliptin_replacement() -> GoalDirectedBenchmark: # Find a molecule dissimilar to sitagliptin, but with the same properties smiles = "Fc1cc(c(F)cc1F)CC(N)CC(=O)N3Cc2nnc(n2CC3)C(F)(F)F" sitagliptin = Chem.MolFromSmiles(smiles) target_logp = logP(sitagliptin) target_tpsa = tpsa(sitagliptin) similarity = TanimotoScoringFunction(smiles, fp_type="ECFP4", score_modifier=GaussianModifier( mu=0, sigma=0.1)) lp = RdkitScoringFunction(descriptor=logP, score_modifier=GaussianModifier(mu=target_logp, sigma=0.2)) tp = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target_tpsa, sigma=5)) isomers = IsomerScoringFunction("C16H15F6N5O") specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark( name="Sitagliptin MPO", objective=GeometricMeanScoringFunction([similarity, lp, tp, isomers]), contribution_specification=specification, )
def isomers_c7h8n2o2() -> GoalDirectedBenchmark: """ Benchmark to try and get 100 isomers for C7H8N2O2. """ specification = uniform_specification(100) return GoalDirectedBenchmark(name='C7H8N2O2', objective=IsomerScoringFunction('C7H8N2O2'), contribution_specification=specification)
def zaleplon_with_other_formula() -> GoalDirectedBenchmark: # zaleplon_with_other_formula with other formula zaleplon = TanimotoScoringFunction('O=C(C)N(CC)C1=CC=CC(C2=CC=NC3=C(C=NN23)C#N)=C1', fp_type='ECFP4') formula = IsomerScoringFunction('C19H17N3O2') specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Zaleplon MPO', objective=GeometricMeanScoringFunction([zaleplon, formula]), contribution_specification=specification)
def isomers_c9h10n2o2pf2cl() -> GoalDirectedBenchmark: """ Benchmark to try and get 100 isomers for C9H10N2O2PF2Cl. """ specification = uniform_specification(100) return GoalDirectedBenchmark( name='C9H10N2O2PF2Cl', objective=IsomerScoringFunction('C9H10N2O2PF2Cl'), contribution_specification=specification)
def isomers_c11h24() -> GoalDirectedBenchmark: """ Benchmark to try and get all C11H24 molecules there are. There should be 159 if one ignores stereochemistry. """ specification = uniform_specification(159) return GoalDirectedBenchmark(name='C11H24', objective=IsomerScoringFunction('C11H24'), contribution_specification=specification)
def isomers_c9h10n2o2pf2cl(mean_function='geometric', n_samples=250) -> GoalDirectedBenchmark: """ Benchmark to try and get 100 isomers for C9H10N2O2PF2Cl. Args: mean_function: 'arithmetic' or 'geometric' """ specification = uniform_specification(n_samples) return GoalDirectedBenchmark(name='C9H10N2O2PF2Cl', objective=IsomerScoringFunction('C9H10N2O2PF2Cl', mean_function=mean_function), contribution_specification=specification)
def isomers_c7h8n2o2(mean_function='geometric') -> GoalDirectedBenchmark: """ Benchmark to try and get 100 isomers for C7H8N2O2. Args: mean_function: 'arithmetic' or 'geometric' """ specification = uniform_specification(100) return GoalDirectedBenchmark(name='C7H8N2O2', objective=IsomerScoringFunction('C7H8N2O2', mean_function=mean_function), contribution_specification=specification)
def isomers_c11h24(mean_function='geometric') -> GoalDirectedBenchmark: """ Benchmark to try and get all C11H24 molecules there are. There should be 159 if one ignores stereochemistry. Args: mean_function: 'arithmetic' or 'geometric' """ specification = uniform_specification(159) return GoalDirectedBenchmark(name='C11H24', objective=IsomerScoringFunction('C11H24', mean_function=mean_function), contribution_specification=specification)
def test_isomer_scoring_function_returns_one_for_correct_molecule(): c11h24_arithmetic = IsomerScoringFunction('C11H24', mean_function='arithmetic') c11h24_geometric = IsomerScoringFunction('C11H24', mean_function='geometric') # all those smiles fit the formula C11H24 smiles1 = 'CCCCCCCCCCC' smiles2 = 'CC(CCC)CCCCCC' smiles3 = 'CCCCC(CC(C)CC)C' assert c11h24_arithmetic.score(smiles1) == 1.0 assert c11h24_arithmetic.score(smiles2) == 1.0 assert c11h24_arithmetic.score(smiles3) == 1.0 assert c11h24_geometric.score(smiles1) == 1.0 assert c11h24_geometric.score(smiles2) == 1.0 assert c11h24_geometric.score(smiles3) == 1.0
def test_isomer_scoring_function_uses_geometric_mean_by_default(): scoring_function = IsomerScoringFunction('C2H4') assert scoring_function.mean_function == geometric_mean
def test_isomer_scoring_function_invalid_molecule(): sf = IsomerScoringFunction('C60') assert sf.score('CCCinvalid') == sf.corrupt_score