コード例 #1
0
def train_ex():
    agent.load_state_dict(torch.load(params['pr_path'] + '.pkg', map_location=utils.dev))

    prior = GraphModel(voc)
    prior.load_state_dict(torch.load(params['ft_path'] + '.pkg', map_location=utils.dev))

    evolver = GraphExplorer(agent, mutate=prior)

    evolver.batch_size = BATCH_SIZE
    evolver.epsilon = float(OPT.get('-e', '1e-2'))
    evolver.sigma = float(OPT.get('-b', '0.00'))
    evolver.scheme = OPT.get('-s', 'WS')
    evolver.repeat = 1

    keys = ['A2A', 'QED']
    A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z)
    QED = utils.Property('QED')

    # Chose the desirability function
    objs = [A2A, QED]

    if evolver.scheme == 'WS':
        mod1 = utils.ClippedScore(lower_x=3, upper_x=10)
        mod2 = utils.ClippedScore(lower_x=0, upper_x=1.0)
        ths = [0.5, 0]
    else:
        mod1 = utils.ClippedScore(lower_x=3, upper_x=6.5)
        mod2 = utils.ClippedScore(lower_x=0, upper_x=1.0)
        ths = [0.99, 0]
    mods = [mod1, mod2]
    evolver.env = utils.Env(objs=objs, mods=mods, keys=keys, ths=ths)

    # import evolve as agent
    evolver.out = root + '/%s_%s_%.0e' % (alg, evolver.scheme, evolver.epsilon)
    evolver.fit(train_loader, test_loader=valid_loader)
コード例 #2
0
ファイル: train_smiles.py プロジェクト: XuhanLiu/DrugEx
def rl_train():
    opts, args = getopt.getopt(sys.argv[1:], "a:e:b:g:c:s:z:")
    OPT = dict(opts)
    case = OPT['-c'] if '-c' in OPT else 'OBJ1'
    z = OPT['-z'] if '-z' in OPT else 'REG'
    alg = OPT['-a'] if '-a' in OPT else 'smile'
    os.environ[
        "CUDA_VISIBLE_DEVICES"] = OPT['-g'] if '-g' in OPT else "0,1,2,3"

    voc = utils.VocSmiles(init_from_file="data/chembl_voc.txt", max_len=100)
    agent = GPT2Model(voc, n_layer=12)
    agent.load_state_dict(
        torch.load(params['pr_path'] + '.pkg', map_location=utils.dev))

    prior = GPT2Model(voc, n_layer=12)
    prior.load_state_dict(
        torch.load(params['ft_path'] + '.pkg', map_location=utils.dev))

    evolver = SmilesExplorer(agent, mutate=prior)

    evolver.batch_size = BATCH_SIZE
    evolver.epsilon = float(OPT.get('-e', '1e-2'))
    evolver.sigma = float(OPT.get('-b', '0.00'))
    evolver.scheme = OPT.get('-s', 'WS')
    evolver.repeat = 1

    keys = ['A2A', 'QED']
    A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z)
    QED = utils.Property('QED')

    # Chose the desirability function
    objs = [A2A, QED]

    if evolver.scheme == 'WS':
        mod1 = utils.ClippedScore(lower_x=3, upper_x=10)
        mod2 = utils.ClippedScore(lower_x=0, upper_x=1)
        ths = [0.5, 0]
    else:
        mod1 = utils.ClippedScore(lower_x=3, upper_x=6.5)
        mod2 = utils.ClippedScore(lower_x=0, upper_x=0.5)
        ths = [0.99, 0]
    mods = [mod1, mod2] if case == 'OBJ3' else [mod1, mod2]
    evolver.env = utils.Env(objs=objs, mods=mods, keys=keys, ths=ths)

    root = 'output/%s_%s' % (alg,
                             time.strftime('%y%m%d_%H%M%S', time.localtime()))

    os.mkdir(root)
    copy2(alg + '_ex.py', root)
    copy2(alg + '.py', root)

    # import evolve as agent
    evolver.out = root + '/%s_%s_%s_%s_%.0e' % (alg, evolver.scheme, z, case,
                                                evolver.epsilon)
    evolver.fit(data_loader, test_loader=test_loader)
コード例 #3
0
    for agent_path in [
            'benchmark/graph_PR_REG_OBJ1_0e+00.pkg',
            'benchmark/graph_PR_REG_OBJ1_1e-01.pkg',
            'benchmark/graph_PR_REG_OBJ1_1e-02.pkg',
            'benchmark/graph_PR_REG_OBJ1_1e-03.pkg',
            'benchmark/graph_PR_REG_OBJ1_1e-04.pkg',
            'benchmark/graph_PR_REG_OBJ1_1e-05.pkg'
    ]:
        # agent_path = 'output/%s_%s_256.pkg' % (path, method)
        print(agent_path)
        agent.load_state_dict(torch.load(agent_path))

        z = 'REG'
        keys = ['A2A']
        A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z)
        QED = utils.Property('QED')

        # Chose the desirability function
        objs = [A2A, QED]

        ths = [6.5, 0.0]

        env = utils.Env(objs=objs, mods=None, keys=keys, ths=ths)
        if method in ['atom']:
            data = pd.read_table('data/ligand_mf_brics_test.txt')
            # data = data.sample(BATCH_SIZE * 10)
            data = torch.from_numpy(data.values).long().view(
                len(data), voc.max_len, -1)
            loader = DataLoader(data, batch_size=BATCH_SIZE)
コード例 #4
0
ファイル: sampler.py プロジェクト: DayDayUpDeng/DrugEx
    for i in tqdm(range(batch + 1)):
        if i == 0:
            if mod == 0: continue
            tokens = netG.sample(batch)
        else:
            tokens = netG.sample(batch_size)
        smiles = [voc.decode(s) for s in tokens]
        samples.extend(smiles)
    return samples


if __name__ == '__main__':
    for z in ['REG']:
        # Construct the environment with three predictors and desirability functions
        keys = ['A1', 'A2A', 'ERG']
        A1 = utils.Predictor('output/env/RF_%s_CHEMBL226.pkg' % z, type=z)
        A2A = utils.Predictor('output/env/RF_%s_CHEMBL251.pkg' % z, type=z)
        ERG = utils.Predictor('output/env/RF_%s_CHEMBL240.pkg' % z, type=z)
        mod1 = utils.ClippedScore(lower_x=4, upper_x=6.5)
        mod2 = utils.ClippedScore(lower_x=9, upper_x=6.5)
        mod3 = utils.ClippedScore(lower_x=7.5, upper_x=5)
        objs = [A1, A2A, ERG]

        models = {
            'output/lstm_ligand.pkg':
            'benchmark/FINE-TUNE_%s_%s.tsv' % (z, case),
            'output/lstm_chembl.pkg':
            'benchmark/PRE-TRAIN_%s_%s.tsv' % (z, case)
        }
        for case in ['OBJ1', 'OBJ3']:
            if case == 'OBJ3':
コード例 #5
0
def analysis_page():
    # Methods
    survey_file, config = request.args.get("survey_file"), request.args.get(
        "config")
    if (not request.method == "POST") and (not survey_file and not config):
        return redirect(url_for("main"))
    # Checking for files
    do_analysis = False
    if request.method == "GET":
        if survey_file and config:
            do_analysis = True
    elif request.method == "POST" and (request.files["file"]
                                       and request.files["config"]):
        do_analysis = True

    # Do analysis
    if do_analysis:
        # Saving files
        if request.method == "POST":
            save = save_file(survey_file=request.files["file"],
                             config_file=request.files["config"])
            directory, filename, config_filename = (
                save["Directory"],
                save["File"],
                save["Config"],
            )
        else:
            directory, filename = os.path.split(survey_file)
            config_filename = os.path.basename(config)

        if filename.endswith(".xlsx"):
            questions = list(
                utils.parse_excel(os.path.join(directory, filename)).keys())
        else:
            questions = list(
                utils.parse_csv(os.path.join(directory, filename)).keys())
        types = utils.parse_config(os.path.join(directory, config_filename))

        # Excel but incomplete config
        if len(questions) != len(types):
            session["TEMP_FOLDER"] = directory
            predictor = utils.Predictor()
            qn_dict = {}
            for i, qn in enumerate(questions):
                if i + 1 not in types.keys():
                    datatype = predictor.predict([qn])
                    qn_dict[i + 1] = (qn, datatype[0])
                else:
                    qn_dict[i + 1] = (qn, types[i + 1])
            questions_index = [(i[0], i[1][0], i[1][1])
                               for i in qn_dict.items()]

            return render_template("config.html",
                                   questions=questions_index,
                                   error=None)

        # Start analysis
        try:
            session["ANALYSIS"] = analyse.analyse(directory, filename,
                                                  config_filename)
        except ValueError as e:
            return render_template(
                "error.html",
                error=
                "ValueError! Perhaps you chose a wrong category for your data",
                error_no="500",
                error_message=error_messages[500],
            )
        except Exception as e:
            return render_template(
                "error.html",
                error=f"Unknown error: {str(e)}",
                error_no="500",
                error_message=error_messages[500],
            )

        graphs, clouds, numerical = [], [], []
        for question, analysis in session["ANALYSIS"].items():
            if analysis:
                if analysis[0] == "categorical":
                    graphs.append([
                        question,
                        utils.pie(
                            question,
                            [x for x in analysis[1]["Percentages"].keys()],
                            [y for y in analysis[1]["Percentages"].values()],
                        ),
                    ])
                elif analysis[0] == "openended":
                    clouds.append([question, analysis[1]])
                elif analysis[0] == "numerical":
                    numerical.append([question, analysis[1]])

        graphs = tuple(utils.chunk(graphs, 3))
        clouds = tuple(utils.chunk(clouds, 2))
        numerical = tuple(utils.chunk(numerical, 4))

        return render_template(
            "analysis.html",
            graphs=graphs,
            clouds=clouds,
            numerical=numerical,
            filename=filename,
            path=os.path.split(directory)[1],
        )
    elif not request.files["file"]:  # No excel
        return render_template("upload.html", error="Missing Excel/CSV file!")

    elif request.files[
            "file"] and not request.files["config"]:  # Excel but no config
        save = save_file(survey_file=request.files["file"])
        directory, filename = save["Directory"], save["File"]
        session["TEMP_FOLDER"] = directory
        if filename.endswith(".xlsx"):
            questions = list(
                utils.parse_excel(os.path.join(directory, filename)).keys())
        else:
            questions = list(
                utils.parse_csv(os.path.join(directory, filename)).keys())
        predictions = utils.Predictor().predict(questions)
        questions_index = [(i + 1, question, predictions[i])
                           for i, question in enumerate(questions)]

        return render_template("config.html",
                               questions=questions_index,
                               error=None)