def sitagliptin_replacement() -> GoalDirectedBenchmark: # Find a molecule dissimilar to sitagliptin, but with the same properties smiles = "Fc1cc(c(F)cc1F)CC(N)CC(=O)N3Cc2nnc(n2CC3)C(F)(F)F" sitagliptin = Chem.MolFromSmiles(smiles) target_logp = logP(sitagliptin) target_tpsa = tpsa(sitagliptin) similarity = TanimotoScoringFunction(smiles, fp_type="ECFP4", score_modifier=GaussianModifier( mu=0, sigma=0.1)) lp = RdkitScoringFunction(descriptor=logP, score_modifier=GaussianModifier(mu=target_logp, sigma=0.2)) tp = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target_tpsa, sigma=5)) isomers = IsomerScoringFunction("C16H15F6N5O") specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark( name="Sitagliptin MPO", objective=GeometricMeanScoringFunction([similarity, lp, tp, isomers]), contribution_specification=specification, )
def smarts_with_other_target(smarts: str, other_molecule: str) -> ScoringFunction: smarts_scoring_function = SMARTSScoringFunction(target=smarts) other_mol = Chem.MolFromSmiles(other_molecule) target_logp = logP(other_mol) target_tpsa = tpsa(other_mol) target_bertz = bertz(other_mol) lp = RdkitScoringFunction(descriptor=logP, score_modifier=GaussianModifier(mu=target_logp, sigma=0.2)) tp = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target_tpsa, sigma=5)) bz = RdkitScoringFunction(descriptor=bertz, score_modifier=GaussianModifier(mu=target_bertz, sigma=30)) return GeometricMeanScoringFunction([smarts_scoring_function, lp, tp, bz])
def weird_physchem() -> GoalDirectedBenchmark: min_bertz = RdkitScoringFunction(descriptor=bertz, score_modifier=MaxGaussianModifier( mu=1500, sigma=200)) mol_under_400 = RdkitScoringFunction(descriptor=mol_weight, score_modifier=MinGaussianModifier( mu=400, sigma=40)) aroma = RdkitScoringFunction(descriptor=num_aromatic_rings, score_modifier=MinGaussianModifier(mu=3, sigma=1)) fluorine = RdkitScoringFunction(descriptor=AtomCounter('F'), score_modifier=GaussianModifier(mu=6, sigma=1.0)) opt_weird = ArithmeticMeanScoringFunction( [min_bertz, mol_under_400, aroma, fluorine]) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Physchem MPO', objective=opt_weird, contribution_specification=specification)
def start_pop_ranolazine() -> GoalDirectedBenchmark: ranolazine = 'COc1ccccc1OCC(O)CN2CCN(CC(=O)Nc3c(C)cccc3C)CC2' modifier = ClippedScoreModifier(upper_x=0.7) similar_to_ranolazine = TanimotoScoringFunction(ranolazine, fp_type='AP', score_modifier=modifier) logP_under_4 = RdkitScoringFunction(descriptor=logP, score_modifier=MaxGaussianModifier( mu=7, sigma=1)) aroma = RdkitScoringFunction(descriptor=num_aromatic_rings, score_modifier=MinGaussianModifier(mu=1, sigma=1)) fluorine = RdkitScoringFunction(descriptor=AtomCounter('F'), score_modifier=GaussianModifier(mu=1, sigma=1.0)) optimize_ranolazine = ArithmeticMeanScoringFunction( [similar_to_ranolazine, logP_under_4, fluorine, aroma]) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Ranolazine MPO', objective=optimize_ranolazine, contribution_specification=specification, starting_population=[ranolazine])
def test_isomer_scoring_function_penalizes_incorrect_number_atoms(): c11h24_arithmetic = IsomerScoringFunction('C12H24', mean_function='arithmetic') c11h24_geometric = IsomerScoringFunction('C12H24', mean_function='geometric') # all those smiles fit the formula C11H24O smiles1 = 'CCCCCCCCOCCC' smiles2 = 'CC(CCOC)CCCCCC' smiles3 = 'COCCCC(CC(C)CC)C' # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms c_score = GaussianModifier(mu=0, sigma=1)(1.0) n_atoms_score = 1.0 h_score = 1.0 expected_score_arithmetic = (n_atoms_score + c_score + h_score) / 3.0 expected_score_geometric = (n_atoms_score * c_score * h_score)**(1 / 3) assert c11h24_arithmetic.score(smiles1) == pytest.approx( expected_score_arithmetic) assert c11h24_arithmetic.score(smiles2) == pytest.approx( expected_score_arithmetic) assert c11h24_arithmetic.score(smiles3) == pytest.approx( expected_score_arithmetic) assert c11h24_geometric.score(smiles1) == pytest.approx( expected_score_geometric) assert c11h24_geometric.score(smiles2) == pytest.approx( expected_score_geometric) assert c11h24_geometric.score(smiles3) == pytest.approx( expected_score_geometric)
def ranolazine_mpo() -> GoalDirectedBenchmark: """ Make start_pop_ranolazine more polar and add a fluorine """ ranolazine = "COc1ccccc1OCC(O)CN2CCN(CC(=O)Nc3c(C)cccc3C)CC2" modifier = ClippedScoreModifier(upper_x=0.7) similar_to_ranolazine = TanimotoScoringFunction(ranolazine, fp_type="AP", score_modifier=modifier) logP_under_4 = RdkitScoringFunction(descriptor=logP, score_modifier=MaxGaussianModifier( mu=7, sigma=1)) tpsa_f = RdkitScoringFunction(descriptor=tpsa, score_modifier=MaxGaussianModifier(mu=95, sigma=20)) fluorine = RdkitScoringFunction(descriptor=AtomCounter("F"), score_modifier=GaussianModifier(mu=1, sigma=1.0)) optimize_ranolazine = GeometricMeanScoringFunction( [similar_to_ranolazine, logP_under_4, fluorine, tpsa_f]) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark( name="Ranolazine MPO", objective=optimize_ranolazine, contribution_specification=specification, starting_population=[ranolazine], )
def pioglitazone_mpo() -> GoalDirectedBenchmark: # pioglitazone with same mw but less rotatable bonds smiles = 'O=C1NC(=O)SC1Cc3ccc(OCCc2ncc(cc2)CC)cc3' pioglitazone = Chem.MolFromSmiles(smiles) target_molw = mol_weight(pioglitazone) similarity = TanimotoScoringFunction(smiles, fp_type='ECFP4', score_modifier=GaussianModifier(mu=0, sigma=0.1)) mw = RdkitScoringFunction(descriptor=mol_weight, score_modifier=GaussianModifier(mu=target_molw, sigma=10)) rb = RdkitScoringFunction(descriptor=num_rotatable_bonds, score_modifier=GaussianModifier(mu=2, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Pioglitazone MPO', objective=GeometricMeanScoringFunction([similarity, mw, rb]), contribution_specification=specification)
def test_gaussian_function(): mu = -1.223 sigma = 0.334 f = GaussianModifier(mu=mu, sigma=sigma) assert f(mu) == 1.0 assert f(scalar_value) == gaussian(scalar_value, mu, sigma) assert np.allclose(f(value_array), gaussian(value_array, mu, sigma))
def tpsa_benchmark(target: float) -> GoalDirectedBenchmark: benchmark_name = f'TPSA (target: {target})' objective = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target, sigma=20.0)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name=benchmark_name, objective=objective, contribution_specification=specification)
def __init__(self, molecular_formula: str) -> None: """ Args: molecular_formula: target molecular formula """ super().__init__() element_occurrences = parse_molecular_formula(molecular_formula) total_number_atoms = sum(element_tuple[1] for element_tuple in element_occurrences) # scoring functions for each element self.functions = [RdkitScoringFunction(descriptor=AtomCounter(element), score_modifier=GaussianModifier(mu=n_atoms, sigma=1.0)) for element, n_atoms in element_occurrences] # scoring functions for the total number of atoms self.functions.append(RdkitScoringFunction(descriptor=num_atoms, score_modifier=GaussianModifier(mu=total_number_atoms, sigma=2.0)))
def amlodipine_rings() -> GoalDirectedBenchmark: # amlodipine with 3 rings amlodipine = TanimotoScoringFunction(r'Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC', fp_type='ECFP4') rings = RdkitScoringFunction(descriptor=num_rings, score_modifier=GaussianModifier(mu=3, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Amlodipine MPO', objective=GeometricMeanScoringFunction([amlodipine, rings]), contribution_specification=specification)
def perindopril_rings() -> GoalDirectedBenchmark: # perindopril with two aromatic rings perindopril = TanimotoScoringFunction('O=C(OCC)C(NC(C(=O)N1C(C(=O)O)CC2CCCCC12)C)CCC', fp_type='ECFP4') arom_rings = RdkitScoringFunction(descriptor=num_aromatic_rings, score_modifier=GaussianModifier(mu=2, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Perindopril MPO', objective=GeometricMeanScoringFunction([perindopril, arom_rings]), contribution_specification=specification)
def determine_scoring_functions( molecular_formula: str) -> List[RdkitScoringFunction]: element_occurrences = parse_molecular_formula(molecular_formula) total_number_atoms = sum(element_tuple[1] for element_tuple in element_occurrences) # scoring functions for each element functions = [ RdkitScoringFunction(descriptor=AtomCounter(element), score_modifier=GaussianModifier(mu=n_atoms, sigma=1.0)) for element, n_atoms in element_occurrences ] # scoring functions for the total number of atoms functions.append( RdkitScoringFunction(descriptor=num_atoms, score_modifier=GaussianModifier( mu=total_number_atoms, sigma=2.0))) return functions
def test_isomer_scoring_function_penalizes_incorrect_number_atoms(): c11h24 = IsomerScoringFunction('C12H24') # all those smiles fit the formula C11H24O smiles1 = 'CCCCCCCCOCCC' smiles2 = 'CC(CCOC)CCCCCC' smiles3 = 'COCCCC(CC(C)CC)C' # the penalty corresponds to a deviation of 1.0 from the gaussian modifier in the number of C atoms penalty_tot_num_atoms = 1.0 - GaussianModifier(mu=0, sigma=1)(1.0) expected_score = 1.0 - penalty_tot_num_atoms / 3.0 assert c11h24.score(smiles1) == pytest.approx(expected_score) assert c11h24.score(smiles2) == pytest.approx(expected_score) assert c11h24.score(smiles3) == pytest.approx(expected_score)