def train_ex(): agent.load_state_dict(torch.load(params['pr_path'] + '.pkg', map_location=utils.dev)) prior = GraphModel(voc) prior.load_state_dict(torch.load(params['ft_path'] + '.pkg', map_location=utils.dev)) evolver = GraphExplorer(agent, mutate=prior) evolver.batch_size = BATCH_SIZE evolver.epsilon = float(OPT.get('-e', '1e-2')) evolver.sigma = float(OPT.get('-b', '0.00')) evolver.scheme = OPT.get('-s', 'WS') evolver.repeat = 1 keys = ['A2A', 'QED'] A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z) QED = utils.Property('QED') # Chose the desirability function objs = [A2A, QED] if evolver.scheme == 'WS': mod1 = utils.ClippedScore(lower_x=3, upper_x=10) mod2 = utils.ClippedScore(lower_x=0, upper_x=1.0) ths = [0.5, 0] else: mod1 = utils.ClippedScore(lower_x=3, upper_x=6.5) mod2 = utils.ClippedScore(lower_x=0, upper_x=1.0) ths = [0.99, 0] mods = [mod1, mod2] evolver.env = utils.Env(objs=objs, mods=mods, keys=keys, ths=ths) # import evolve as agent evolver.out = root + '/%s_%s_%.0e' % (alg, evolver.scheme, evolver.epsilon) evolver.fit(train_loader, test_loader=valid_loader)
def rl_train(): opts, args = getopt.getopt(sys.argv[1:], "a:e:b:g:c:s:z:") OPT = dict(opts) case = OPT['-c'] if '-c' in OPT else 'OBJ1' z = OPT['-z'] if '-z' in OPT else 'REG' alg = OPT['-a'] if '-a' in OPT else 'smile' os.environ[ "CUDA_VISIBLE_DEVICES"] = OPT['-g'] if '-g' in OPT else "0,1,2,3" voc = utils.VocSmiles(init_from_file="data/chembl_voc.txt", max_len=100) agent = GPT2Model(voc, n_layer=12) agent.load_state_dict( torch.load(params['pr_path'] + '.pkg', map_location=utils.dev)) prior = GPT2Model(voc, n_layer=12) prior.load_state_dict( torch.load(params['ft_path'] + '.pkg', map_location=utils.dev)) evolver = SmilesExplorer(agent, mutate=prior) evolver.batch_size = BATCH_SIZE evolver.epsilon = float(OPT.get('-e', '1e-2')) evolver.sigma = float(OPT.get('-b', '0.00')) evolver.scheme = OPT.get('-s', 'WS') evolver.repeat = 1 keys = ['A2A', 'QED'] A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z) QED = utils.Property('QED') # Chose the desirability function objs = [A2A, QED] if evolver.scheme == 'WS': mod1 = utils.ClippedScore(lower_x=3, upper_x=10) mod2 = utils.ClippedScore(lower_x=0, upper_x=1) ths = [0.5, 0] else: mod1 = utils.ClippedScore(lower_x=3, upper_x=6.5) mod2 = utils.ClippedScore(lower_x=0, upper_x=0.5) ths = [0.99, 0] mods = [mod1, mod2] if case == 'OBJ3' else [mod1, mod2] evolver.env = utils.Env(objs=objs, mods=mods, keys=keys, ths=ths) root = 'output/%s_%s' % (alg, time.strftime('%y%m%d_%H%M%S', time.localtime())) os.mkdir(root) copy2(alg + '_ex.py', root) copy2(alg + '.py', root) # import evolve as agent evolver.out = root + '/%s_%s_%s_%s_%.0e' % (alg, evolver.scheme, z, case, evolver.epsilon) evolver.fit(data_loader, test_loader=test_loader)
for agent_path in [ 'benchmark/graph_PR_REG_OBJ1_0e+00.pkg', 'benchmark/graph_PR_REG_OBJ1_1e-01.pkg', 'benchmark/graph_PR_REG_OBJ1_1e-02.pkg', 'benchmark/graph_PR_REG_OBJ1_1e-03.pkg', 'benchmark/graph_PR_REG_OBJ1_1e-04.pkg', 'benchmark/graph_PR_REG_OBJ1_1e-05.pkg' ]: # agent_path = 'output/%s_%s_256.pkg' % (path, method) print(agent_path) agent.load_state_dict(torch.load(agent_path)) z = 'REG' keys = ['A2A'] A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z) QED = utils.Property('QED') # Chose the desirability function objs = [A2A, QED] ths = [6.5, 0.0] env = utils.Env(objs=objs, mods=None, keys=keys, ths=ths) if method in ['atom']: data = pd.read_table('data/ligand_mf_brics_test.txt') # data = data.sample(BATCH_SIZE * 10) data = torch.from_numpy(data.values).long().view( len(data), voc.max_len, -1) loader = DataLoader(data, batch_size=BATCH_SIZE)
for i in tqdm(range(batch + 1)): if i == 0: if mod == 0: continue tokens = netG.sample(batch) else: tokens = netG.sample(batch_size) smiles = [voc.decode(s) for s in tokens] samples.extend(smiles) return samples if __name__ == '__main__': for z in ['REG']: # Construct the environment with three predictors and desirability functions keys = ['A1', 'A2A', 'ERG'] A1 = utils.Predictor('output/env/RF_%s_CHEMBL226.pkg' % z, type=z) A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z) ERG = utils.Predictor('output/env/RF_%s_CHEMBL240.pkg' % z, type=z) mod1 = utils.ClippedScore(lower_x=4, upper_x=6.5) mod2 = utils.ClippedScore(lower_x=9, upper_x=6.5) mod3 = utils.ClippedScore(lower_x=7.5, upper_x=5) objs = [A1, A2A, ERG] models = { 'output/lstm_ligand.pkg': 'benchmark/FINE-TUNE_%s_%s.tsv' % (z, case), 'output/lstm_chembl.pkg': 'benchmark/PRE-TRAIN_%s_%s.tsv' % (z, case) } for case in ['OBJ1', 'OBJ3']: if case == 'OBJ3':
def analysis_page(): # Methods survey_file, config = request.args.get("survey_file"), request.args.get( "config") if (not request.method == "POST") and (not survey_file and not config): return redirect(url_for("main")) # Checking for files do_analysis = False if request.method == "GET": if survey_file and config: do_analysis = True elif request.method == "POST" and (request.files["file"] and request.files["config"]): do_analysis = True # Do analysis if do_analysis: # Saving files if request.method == "POST": save = save_file(survey_file=request.files["file"], config_file=request.files["config"]) directory, filename, config_filename = ( save["Directory"], save["File"], save["Config"], ) else: directory, filename = os.path.split(survey_file) config_filename = os.path.basename(config) if filename.endswith(".xlsx"): questions = list( utils.parse_excel(os.path.join(directory, filename)).keys()) else: questions = list( utils.parse_csv(os.path.join(directory, filename)).keys()) types = utils.parse_config(os.path.join(directory, config_filename)) # Excel but incomplete config if len(questions) != len(types): session["TEMP_FOLDER"] = directory predictor = utils.Predictor() qn_dict = {} for i, qn in enumerate(questions): if i + 1 not in types.keys(): datatype = predictor.predict([qn]) qn_dict[i + 1] = (qn, datatype[0]) else: qn_dict[i + 1] = (qn, types[i + 1]) questions_index = [(i[0], i[1][0], i[1][1]) for i in qn_dict.items()] return render_template("config.html", questions=questions_index, error=None) # Start analysis try: session["ANALYSIS"] = analyse.analyse(directory, filename, config_filename) except ValueError as e: return render_template( "error.html", error= "ValueError! Perhaps you chose a wrong category for your data", error_no="500", error_message=error_messages[500], ) except Exception as e: return render_template( "error.html", error=f"Unknown error: {str(e)}", error_no="500", error_message=error_messages[500], ) graphs, clouds, numerical = [], [], [] for question, analysis in session["ANALYSIS"].items(): if analysis: if analysis[0] == "categorical": graphs.append([ question, utils.pie( question, [x for x in analysis[1]["Percentages"].keys()], [y for y in analysis[1]["Percentages"].values()], ), ]) elif analysis[0] == "openended": clouds.append([question, analysis[1]]) elif analysis[0] == "numerical": numerical.append([question, analysis[1]]) graphs = tuple(utils.chunk(graphs, 3)) clouds = tuple(utils.chunk(clouds, 2)) numerical = tuple(utils.chunk(numerical, 4)) return render_template( "analysis.html", graphs=graphs, clouds=clouds, numerical=numerical, filename=filename, path=os.path.split(directory)[1], ) elif not request.files["file"]: # No excel return render_template("upload.html", error="Missing Excel/CSV file!") elif request.files[ "file"] and not request.files["config"]: # Excel but no config save = save_file(survey_file=request.files["file"]) directory, filename = save["Directory"], save["File"] session["TEMP_FOLDER"] = directory if filename.endswith(".xlsx"): questions = list( utils.parse_excel(os.path.join(directory, filename)).keys()) else: questions = list( utils.parse_csv(os.path.join(directory, filename)).keys()) predictions = utils.Predictor().predict(questions) questions_index = [(i + 1, question, predictions[i]) for i, question in enumerate(questions)] return render_template("config.html", questions=questions_index, error=None)