Beispiel #1
0
def main():
    decoded_file = GRAMMAR_WEIGHTS.split(".")[0] + "_decRes.txt"
    priors_file = GRAMMAR_WEIGHTS.split(".")[0] + "_priorsRes.txt"
    generation_file = GRAMMAR_WEIGHTS.split(".")[0] + "_generationRes.txt"
    grammar_model = molecule_vae.Qm9GrammarModel(GRAMMAR_WEIGHTS)

    XTE = readStr_qm9()
    XTE = XTE[0:5000]
    # rember to comment and uncomment the line  in the #moelcule_vae file
    decoded_result = reconstruction(grammar_model, XTE)
    save_decoded_results(XTE, decoded_result, decoded_file)
    # decoded_priors = prior(grammar_model)
    # save_decoded_priors(decoded_priors, priors_file)
    decoded_generation = generation(grammar_model)
    save_decoded_priors(decoded_generation, generation_file)
def main():
    from att_model_proxy import AttMolProxy as ProxyModel
    from att_model_proxy import cmd_args
    # takes the model and calculate the decode results
    model = ProxyModel()
    # update where to save
    decoded_file = cmd_args.save_dir + '/decoded_results.txt'

    # reading smiles test set
    if cmd_args.smiles_file == 'qm9':
        smiles_list = readStr_qm9()
    elif cmd_args.smiles_file == 'zinc':
        smiles_list = read_zinc()

    XTE = smiles_list[0:nb_smiles]

    decoded_result = reconstruct(model, XTE)
    decoded_result = np.array(decoded_result)
    save_decoded_results(XTE, decoded_result, decoded_file)
def main():
    torch.manual_seed(0)
    lg = rdkit.RDLogger.logger()
    lg.setLevel(rdkit.RDLogger.CRITICAL)

    parser = OptionParser()
    parser.add_option("-t", "--test", dest="test_path")
    parser.add_option("-v", "--vocab", dest="vocab_path")
    parser.add_option("-m", "--model", dest="model_path")
    parser.add_option("-w", "--hidden", dest="hidden_size", default=200)
    parser.add_option("-l", "--latent", dest="latent_size", default=56)
    parser.add_option("-d", "--depth", dest="depth", default=3)
    opts, args = parser.parse_args()

    vocab = [x.strip("\r\n ") for x in open(opts.vocab_path)]
    vocab = Vocab(vocab)

    hidden_size = int(opts.hidden_size)
    latent_size = int(opts.latent_size)
    depth = int(opts.depth)

    model = JTNNVAE(vocab, hidden_size, latent_size, depth)
    model.load_state_dict(torch.load(opts.model_path))
    model = model.cuda()

    dataset_name = opts.test_path
    result_file = dataset_name + "_decoded_results.txt"
    priors_file = dataset_name + "_decoded_priors.txt"
    generation_fie = dataset_name + "_generation.txt"

    # read dataset
    if dataset_name == "zinc":
        XTE = read_zinc()
    else:
        D = readStr_qm9()
        # fix problem about molecule with '.' inside
        XTE = []
        for mol in D:
            if "." not in mol:
                XTE.append(mol)

    # reconstruction
    XTE = XTE[0:5000]
    XTE = filter(lambda x: len(x) > 1,
                 XTE)  #needed for removing smiles with only a char.
    decoded_result = reconstruction(model, XTE, 20, 1)
    save_decoded_results(XTE, decoded_result, result_file)

    # prior
    # decoded_priors_witherrors = model.sample_prior_eval(True, 1000, 10)
    # decoded_priors = []
    # for i in decoded_priors_witherrors:
    #     decoded_priors.append(sanitize(i))
    # save_decoded_priors(decoded_priors, priors_file)

    # generation
    generation_witherrors = model.sample_prior_eval(True, 20000, 1)
    generation = []
    for i in generation_witherrors:
        generation.append(sanitize(i))
    save_decoded_priors(generation, generation_fie)