) logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("simulations_per_iteration = 50000") logger.info("keep_top_n = 5000") logger.info("loading language model...") vocab = get_arpa_vocab( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" scorer = TanimotoScorer(abilify, radius=6) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def log_best(j, all_best, n_valid, lggr): if j % 1000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid: %d" % n_valid) log_top_best(all_best, 5, lggr) def smiles_to_deepsmiles(smiles):
logger.info(os.path.basename(__file__)) logger.info("KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)") logger.info("width = 12, max_depth = 50, start_state = ['<s>'], c = 100") logger.info("score: -1.0 if invalid; -1.0 if seen in all smiles; tanimoto distance from abilify if valid") logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000") vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info("deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/")
) logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info( "TanimotoScorer(abilify, radius=6); distance only (no SA or cycle scoring)" ) logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000 unique") vocab = get_arpa_vocab( '../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info( "deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/")
logger = get_logger('chemgrams.log') THIS_DIR = os.path.dirname(os.path.abspath(__file__)) logger.info(os.path.basename(__file__)) logger.info("KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000") vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info("deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/")
"KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab)" ) logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("attempts_per_iteration = 400000") logger.info("keep_top_n = 20000") logger.info("loading language model...") vocab = get_arpa_vocab( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info(
logger.info("KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab)") logger.info("width = 12, max_depth = 35, start_state = ['<s>'], c = 5") logger.info("score: -1.0 if invalid; -1.0 if seen previously; tanimoto distance from abilify if valid") logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify)") logger.info("num_iterations = 15") logger.info("simulations_per_iteration = 50000") logger.info("keep_top_n = 5000") logger.info("loading language model...") vocab = get_arpa_vocab('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.arpa') lm = KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" scorer = TanimotoScorer(abilify) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def log_best(j, all_best, n_valid, lggr): if j % 1000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid: %d" % n_valid) log_top_best(all_best, 5, lggr) def smiles_to_deepsmiles(smiles):
logger = get_logger('chemgrams.log') THIS_DIR = os.path.dirname(os.path.abspath(__file__)) logger.info(os.path.basename(__file__)) logger.info("KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)") logger.info("TanimotoScorer(celecoxib, radius=6); distance only (no SA or cycle scoring)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000 of all (including duplicates)") vocab = get_arpa_vocab('../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N" distance_scorer = TanimotoScorer(celecoxib, radius=6) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info("deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/") if os.path.exists(path) and os.path.isdir(path): shutil.rmtree(path)
logger.info( "TanimotoScorer(celecoxib, radius=6); distance only (no SA or cycle scoring)" ) logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 200000 of all (including duplicates)") vocab = get_arpa_vocab( '../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) # abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" # distance_scorer = TanimotoScorer(abilify, radius=6) celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N" distance_scorer = TanimotoScorer(celecoxib, radius=6) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info( "deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/")
logger.info(os.path.basename(__file__)) logger.info("KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)") logger.info("width = 12, max_depth = 50, start_state = ['<s>'], c = 2") logger.info("score: -1.0 if invalid; -1.0 if seen in iteration; tanimoto distance from abilify if valid; rescaling from [0,1] to [-1,1]") logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify, radius=6); distance only (no SA or cycle scoring)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep all valid in current iteration (including duplicates)") vocab = get_arpa_vocab('../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) query_scorer = TanimotoScorer(abilify, radius=6) # QueryScorer(abilify, radius=6, k=1.0) # query_scorer = QueryScorer(abilify, k=1.0) # roughly equivalent to FCFP4 # celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N" # distance_scorer = TanimotoScorer(celecoxib, radius=6) converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical)
) logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("simulations_per_iteration = 50000") logger.info("keep_top_n = 5000") logger.info("loading language model...") vocab = get_arpa_vocab( '../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def log_best(j, all_best, n_valid, lggr): if j % 10000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid: %d" % n_valid) log_top_best(all_best, 5, lggr)