def sample(): MODELS = ModelsStorage() model_vocab = torch.load(model_config.vocab_path) model = MODELS.get_model_class(model_config.model)(model_vocab, model_config) # load the model assert os.path.exists(model_config.lbann_weights_dir) is not None weights_prefix = f"{model_config.lbann_weights_dir}/{model_config.weight_prefix}.epoch.{model_config.lbann_load_epoch}.step.{model_config.lbann_load_step}" model.load_lbann_weights( weights_prefix, ) # here we should try to wrap model in a dataparallel layer or something? model.cuda() model.eval() samples = [] n = model_config.n_samples print("Generating Samples") with tqdm(total=model_config.n_samples, desc="Generating samples") as T: while n > 0: current_samples = model.sample( min(n, model_config.n_batch), model_config.max_len ) samples.extend(current_samples) n -= len(current_samples) T.update(len(current_samples)) samples = pd.DataFrame(samples, columns=["SMILES"]) print("Save generated samples to ", model_config.gen_save) samples.to_csv(model_config.gen_save, index=False) return samples
# df['dec_fp'] = df['dec'].apply(lambda x: get_mol_features(Chem.MolFromSmiles(x))) df = df.sample(frac=0.1).reset_index(drop=True) df2 = df.copy() # ============================================================================= # # ============================================================================= import torch from moses.models_storage import ModelsStorage from moses.latentgan.model import load_model MODELS = ModelsStorage() model_config = torch.load('../temp') model_vocab = torch.load('../vocab') model_state = torch.load('../t_020.pt') model = MODELS.get_model_class('latentgan')(model_vocab, model_config) model.load_state_dict(model_state) model = model.cuda() model.eval() model.model_loaded = True _, smi2vec = load_model() act2fp = set(df['act']) act2fp = {smi:model.heteroencoder.encode(smi2vec([smi])) for smi in act2fp} act2fp = {key: np.array(item)/(np.sum(item**2)**0.5) for key, item in act2fp.items()} df2['simi'] = df2['dec'].apply(lambda x: model.heteroencoder.encode(smi2vec([x]))) df2['simi'] = df2['simi'].apply(lambda x:np.array(x)/(np.sum(x**2)**0.5)) df2['simi'] = df2[['act', 'simi']].values.tolist()