def inference_guided(self, latent, embedding, max_len=None): if max_len is None: max_len = self.max_len assert latent.size(1) == self.latent_size, 'latent size error!' batch_size = latent.size(0) hidden = self.latent2hidden(latent) hidden = hidden.view(batch_size, self.hidden_factor, self.hidden_size) hidden = hidden.permute(1, 0, 2).contiguous() hidden = torch.tanh(hidden) input_sequence = torch.Tensor(batch_size).fill_( self.sos_idx).unsqueeze(1).long() index_pred = torch.LongTensor() smiles_pred = ["" for _ in range(batch_size)] smiles_state = [0 for _ in range(batch_size) ] # -1: failed, 1; succeed, 0: todo for t in range(max_len): input_sequence = input_sequence.cuda() input_embedding = embedding(input_sequence) # decoder rnn run once output, hidden = self.rnn(input_embedding, hidden) output = self.outputs_dropout(output) logits = self.outputs2vocab(output).cpu() # prepare next input input_sequence = torch.argmax(logits, dim=-1) _, index = torch.sort(logits, dim=-1, descending=True) for i in range(batch_size): # check if the smiles finished if smiles_state[i] != 0: continue # check for all tokens in descending possibility flag = False for j in range(logits.size(-1)): idx_cur = index[i][0][j] if idx_cur == self.padding_idx: if Chem.MolFromSmiles(smiles_pred[i]) is not None: input_sequence[i] = idx_cur smiles_state[i] = 1 break elif idx_cur > 2: smi = smiles_pred[i] + self.vocab.itos[idx_cur] try: ps.ParseSmiles(smi, partial=True) input_sequence[i] = idx_cur smiles_pred[i] = smi flag = True break except ps.Error as e: continue # failed for all tokens if not flag: smiles_state[i] = -1 index_pred = torch.cat((index_pred, input_sequence), dim=1) if 0 not in smiles_state: break return smiles_pred, index_pred
def validate_smiles(smiles: str) -> None: """Validates that the `partialsmiles` library can convert the POST's SMILES string to a structure. This validator will ignore PartialSmiles' rejection of uncommon valences like `[S]` but will raise an error when it hits chirality described with `@@(`. For example, `[C@H](...` is valid but `C@@(...` is not. Args: smiles: The SMILES string Raises: ParseError: If the SMILES string cannot be interpreted according to the SMILES dialect used by `partialsmiles`. The error message includes the index at which the parser encountered an invalid character. """ try: ps.ParseSmiles(smiles, partial=False) except ps.ValenceError: pass except ps.SMILESSyntaxError as e: raise ValidationError(f"Structure is not in SMILES format: {e}")
if __name__ == "__main__": if len(sys.argv) != 2: help_text() sys.exit(1) smi = sys.argv[1] print("Input: %s" % smi) print("\n1. Parse as full SMILES:\n") full_status = PASS try: ps.ParseSmiles(smi) except ps.Error as e: print("FAIL\n<<<\n" + str(e) + "\n>>>") full_status = FAIL except Exception: raise else: print("OK") print("\n2. Parse as partial SMILES:\n") partial_status = PASS try: ps.ParseSmiles(smi, partial=True) except ps.Error as e: print("FAIL\n<<<\n" + str(e) + "\n>>>") partial_status = FAIL
for j in range(1, len(smiB) - 1): yield smiA[:i] + smiB[j:] yield smiB[:j] + smiA[i:] if __name__ == "__main__": fname = sys.argv[1] for smi in mutants(sys.argv[1]): try: # Does it fail for a substring, but then pass for a # longer string? (Or the whole treated as a complete string?) substring_failed = False for i in range(1, len(smi) + 1): sub = smi[:i] try: ps.ParseSmiles(sub, partial=True) except ps.Error: substring_failed = True else: if substring_failed: print("ERROR!") print(smi) continue try: ps.ParseSmiles(smi, partial=False) except ps.Error: pass else: if substring_failed: print("ERROR!")