logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("attempts_per_iteration = 400000") logger.info("keep_top_n = 20000") logger.info("loading language model...") vocab = get_arpa_vocab( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa') lm = KenLMDeepSMILESLanguageModel( '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info( "deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/")
"score: -1.0 if invalid; -1.0 if seen previously; tanimoto distance from abilify if valid" ) logger.info("LanguageModelMCTSWithPUCTTerminating") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 300") logger.info("simulations_per_iteration = 100000") logger.info("keep_top_n = 10000") logger.info("loading language model...") lm = EmptyDeepSMILESLanguageModel(vocab, n=6) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def log_best(j, all_best, n_valid, lggr): if j % 10000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid: %d" % n_valid) log_top_best(all_best, 5, lggr) def smiles_to_deepsmiles(smiles):
THIS_DIR = os.path.dirname(os.path.abspath(__file__)) logger.info(os.path.basename(__file__)) logger.info("KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)") logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000") vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa') lm = KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab) abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl" distance_scorer = TanimotoScorer(abilify, radius=6) cycle_scorer = CycleScorer() converter = Converter(rings=True, branches=True) env = os.environ.copy() env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"] lm_trainer = KenLMTrainer(env) def smiles_to_deepsmiles(smiles): canonical = pybel.readstring("smi", smiles).write("can").strip() return converter.encode(canonical) logger.info("deleting any existing molexit directory, and creating a new one...") path = Path("../models/molexit/") if os.path.exists(path) and os.path.isdir(path): shutil.rmtree(path)