Example #1
0
def sample():
    MODELS = ModelsStorage()
    model_vocab = torch.load(model_config.vocab_path)
    model = MODELS.get_model_class(model_config.model)(model_vocab, model_config)
    # load the model
    assert os.path.exists(model_config.lbann_weights_dir) is not None

    weights_prefix = f"{model_config.lbann_weights_dir}/{model_config.weight_prefix}.epoch.{model_config.lbann_load_epoch}.step.{model_config.lbann_load_step}"
    model.load_lbann_weights(
        weights_prefix,
    )


    # here we should try to wrap model in a dataparallel layer or something?
    model.cuda()
    model.eval()

    samples = []
    n = model_config.n_samples
    print("Generating Samples")
    with tqdm(total=model_config.n_samples, desc="Generating samples") as T:
        while n > 0:
            current_samples = model.sample(
                min(n, model_config.n_batch), model_config.max_len
            )
            samples.extend(current_samples)

            n -= len(current_samples)
            T.update(len(current_samples))

    samples = pd.DataFrame(samples, columns=["SMILES"])
    print("Save generated samples to ", model_config.gen_save)
    samples.to_csv(model_config.gen_save, index=False)
    return samples
Example #2
0
# df['dec_fp'] = df['dec'].apply(lambda x: get_mol_features(Chem.MolFromSmiles(x)))
df = df.sample(frac=0.1).reset_index(drop=True)
df2 = df.copy()
# =============================================================================
# 
# =============================================================================
import torch
from moses.models_storage import ModelsStorage
from moses.latentgan.model import load_model
MODELS = ModelsStorage()

model_config = torch.load('../temp')
model_vocab = torch.load('../vocab')
model_state = torch.load('../t_020.pt')
model = MODELS.get_model_class('latentgan')(model_vocab, model_config)
model.load_state_dict(model_state)
model = model.cuda()
model.eval()

model.model_loaded = True
_, smi2vec = load_model()

act2fp = set(df['act'])
act2fp = {smi:model.heteroencoder.encode(smi2vec([smi])) for smi in act2fp}
act2fp = {key: np.array(item)/(np.sum(item**2)**0.5) for key, item in act2fp.items()}

df2['simi'] = df2['dec'].apply(lambda x: model.heteroencoder.encode(smi2vec([x])))
df2['simi'] = df2['simi'].apply(lambda x:np.array(x)/(np.sum(x**2)**0.5))

df2['simi'] = df2[['act', 'simi']].values.tolist()