def ranolazine_mpo() -> GoalDirectedBenchmark: """ Make start_pop_ranolazine more polar and add a fluorine """ ranolazine = "COc1ccccc1OCC(O)CN2CCN(CC(=O)Nc3c(C)cccc3C)CC2" modifier = ClippedScoreModifier(upper_x=0.7) similar_to_ranolazine = TanimotoScoringFunction(ranolazine, fp_type="AP", score_modifier=modifier) logP_under_4 = RdkitScoringFunction(descriptor=logP, score_modifier=MaxGaussianModifier( mu=7, sigma=1)) tpsa_f = RdkitScoringFunction(descriptor=tpsa, score_modifier=MaxGaussianModifier(mu=95, sigma=20)) fluorine = RdkitScoringFunction(descriptor=AtomCounter("F"), score_modifier=GaussianModifier(mu=1, sigma=1.0)) optimize_ranolazine = GeometricMeanScoringFunction( [similar_to_ranolazine, logP_under_4, fluorine, tpsa_f]) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark( name="Ranolazine MPO", objective=optimize_ranolazine, contribution_specification=specification, starting_population=[ranolazine], )
def sitagliptin_replacement() -> GoalDirectedBenchmark: # Find a molecule dissimilar to sitagliptin, but with the same properties smiles = "Fc1cc(c(F)cc1F)CC(N)CC(=O)N3Cc2nnc(n2CC3)C(F)(F)F" sitagliptin = Chem.MolFromSmiles(smiles) target_logp = logP(sitagliptin) target_tpsa = tpsa(sitagliptin) similarity = TanimotoScoringFunction(smiles, fp_type="ECFP4", score_modifier=GaussianModifier( mu=0, sigma=0.1)) lp = RdkitScoringFunction(descriptor=logP, score_modifier=GaussianModifier(mu=target_logp, sigma=0.2)) tp = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target_tpsa, sigma=5)) isomers = IsomerScoringFunction("C16H15F6N5O") specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark( name="Sitagliptin MPO", objective=GeometricMeanScoringFunction([similarity, lp, tp, isomers]), contribution_specification=specification, )
def test_geometric_mean_scoring_function(): # define a scoring function returning the geometric mean from two mock functions # and assert that it returns the correct values. mock_values_1 = [0.232, 0.665, 0.0, 1.0, 0.993] mock_values_2 = [0.010, 0.335, 0.8, 0.3, 0.847] mock_1 = MockScoringFunction(mock_values_1) mock_2 = MockScoringFunction(mock_values_2) scoring_function = GeometricMeanScoringFunction(scoring_functions=[mock_1, mock_2]) smiles = ['CC'] * 5 scores = scoring_function.score_list(smiles) expected = [sqrt(v1 * v2) for v1, v2 in zip(mock_values_1, mock_values_2)] assert scores == expected
def median_tadalafil_sildenafil() -> GoalDirectedBenchmark: # median mol between tadalafil and sildenafil m1 = TanimotoScoringFunction('O=C1N(CC(N2C1CC3=C(C2C4=CC5=C(OCO5)C=C4)NC6=C3C=CC=C6)=O)C', fp_type='ECFP6') m2 = TanimotoScoringFunction('CCCC1=NN(C2=C1N=C(NC2=O)C3=C(C=CC(=C3)S(=O)(=O)N4CCN(CC4)C)OCC)C', fp_type='ECFP6') median = GeometricMeanScoringFunction([m1, m2]) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Median molecules 2', objective=median, contribution_specification=specification)
def zaleplon_with_other_formula() -> GoalDirectedBenchmark: # zaleplon_with_other_formula with other formula zaleplon = TanimotoScoringFunction('O=C(C)N(CC)C1=CC=CC(C2=CC=NC3=C(C=NN23)C#N)=C1', fp_type='ECFP4') formula = IsomerScoringFunction('C19H17N3O2') specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Zaleplon MPO', objective=GeometricMeanScoringFunction([zaleplon, formula]), contribution_specification=specification)
def amlodipine_rings() -> GoalDirectedBenchmark: # amlodipine with 3 rings amlodipine = TanimotoScoringFunction(r'Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC', fp_type='ECFP4') rings = RdkitScoringFunction(descriptor=num_rings, score_modifier=GaussianModifier(mu=3, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Amlodipine MPO', objective=GeometricMeanScoringFunction([amlodipine, rings]), contribution_specification=specification)
def perindopril_rings() -> GoalDirectedBenchmark: # perindopril with two aromatic rings perindopril = TanimotoScoringFunction('O=C(OCC)C(NC(C(=O)N1C(C(=O)O)CC2CCCCC12)C)CCC', fp_type='ECFP4') arom_rings = RdkitScoringFunction(descriptor=num_aromatic_rings, score_modifier=GaussianModifier(mu=2, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Perindopril MPO', objective=GeometricMeanScoringFunction([perindopril, arom_rings]), contribution_specification=specification)
def smarts_with_other_target(smarts: str, other_molecule: str) -> ScoringFunction: smarts_scoring_function = SMARTSScoringFunction(target=smarts) other_mol = Chem.MolFromSmiles(other_molecule) target_logp = logP(other_mol) target_tpsa = tpsa(other_mol) target_bertz = bertz(other_mol) lp = RdkitScoringFunction(descriptor=logP, score_modifier=GaussianModifier(mu=target_logp, sigma=0.2)) tp = RdkitScoringFunction(descriptor=tpsa, score_modifier=GaussianModifier(mu=target_tpsa, sigma=5)) bz = RdkitScoringFunction(descriptor=bertz, score_modifier=GaussianModifier(mu=target_bertz, sigma=30)) return GeometricMeanScoringFunction([smarts_scoring_function, lp, tp, bz])
def pioglitazone_mpo() -> GoalDirectedBenchmark: # pioglitazone with same mw but less rotatable bonds smiles = 'O=C1NC(=O)SC1Cc3ccc(OCCc2ncc(cc2)CC)cc3' pioglitazone = Chem.MolFromSmiles(smiles) target_molw = mol_weight(pioglitazone) similarity = TanimotoScoringFunction(smiles, fp_type='ECFP4', score_modifier=GaussianModifier(mu=0, sigma=0.1)) mw = RdkitScoringFunction(descriptor=mol_weight, score_modifier=GaussianModifier(mu=target_molw, sigma=10)) rb = RdkitScoringFunction(descriptor=num_rotatable_bonds, score_modifier=GaussianModifier(mu=2, sigma=0.5)) specification = uniform_specification(1, 10, 100) return GoalDirectedBenchmark(name='Pioglitazone MPO', objective=GeometricMeanScoringFunction([similarity, mw, rb]), contribution_specification=specification)