Beispiel #1
0
    def inference_guided(self, latent, embedding, max_len=None):
        if max_len is None:
            max_len = self.max_len
        assert latent.size(1) == self.latent_size, 'latent size error!'
        batch_size = latent.size(0)
        hidden = self.latent2hidden(latent)

        hidden = hidden.view(batch_size, self.hidden_factor, self.hidden_size)
        hidden = hidden.permute(1, 0, 2).contiguous()
        hidden = torch.tanh(hidden)

        input_sequence = torch.Tensor(batch_size).fill_(
            self.sos_idx).unsqueeze(1).long()
        index_pred = torch.LongTensor()
        smiles_pred = ["" for _ in range(batch_size)]
        smiles_state = [0 for _ in range(batch_size)
                        ]  # -1: failed, 1; succeed, 0: todo
        for t in range(max_len):
            input_sequence = input_sequence.cuda()
            input_embedding = embedding(input_sequence)

            # decoder rnn run once
            output, hidden = self.rnn(input_embedding, hidden)
            output = self.outputs_dropout(output)
            logits = self.outputs2vocab(output).cpu()

            # prepare next input
            input_sequence = torch.argmax(logits, dim=-1)
            _, index = torch.sort(logits, dim=-1, descending=True)
            for i in range(batch_size):
                # check if the smiles finished
                if smiles_state[i] != 0:
                    continue
                # check for all tokens in descending possibility
                flag = False
                for j in range(logits.size(-1)):
                    idx_cur = index[i][0][j]
                    if idx_cur == self.padding_idx:
                        if Chem.MolFromSmiles(smiles_pred[i]) is not None:
                            input_sequence[i] = idx_cur
                            smiles_state[i] = 1
                            break
                    elif idx_cur > 2:
                        smi = smiles_pred[i] + self.vocab.itos[idx_cur]
                        try:
                            ps.ParseSmiles(smi, partial=True)
                            input_sequence[i] = idx_cur
                            smiles_pred[i] = smi
                            flag = True
                            break
                        except ps.Error as e:
                            continue
                # failed for all tokens
                if not flag:
                    smiles_state[i] = -1
            index_pred = torch.cat((index_pred, input_sequence), dim=1)
            if 0 not in smiles_state:
                break
        return smiles_pred, index_pred
Beispiel #2
0
def validate_smiles(smiles: str) -> None:
    """Validates that the `partialsmiles` library can convert the POST's SMILES string
    to a structure. This validator will ignore PartialSmiles' rejection of uncommon
    valences like `[S]` but will raise an error when it hits chirality described with
    `@@(`. For example, `[C@H](...` is valid but `C@@(...` is not.

    Args:
        smiles: The SMILES string

    Raises:
        ParseError: If the SMILES string cannot be interpreted according to the SMILES
        dialect used by `partialsmiles`. The error message includes the index at which
        the parser encountered an invalid character.
    """
    try:
        ps.ParseSmiles(smiles, partial=False)
    except ps.ValenceError:
        pass
    except ps.SMILESSyntaxError as e:
        raise ValidationError(f"Structure is not in SMILES format: {e}")
Beispiel #3
0
if __name__ == "__main__":

    if len(sys.argv) != 2:
        help_text()
        sys.exit(1)

    smi = sys.argv[1]

    print("Input: %s" % smi)


    print("\n1. Parse as full SMILES:\n")
    full_status = PASS
    try:
        ps.ParseSmiles(smi)
    except ps.Error as e:
        print("FAIL\n<<<\n" + str(e) + "\n>>>")
        full_status = FAIL
    except Exception:
        raise
    else:
        print("OK")

    print("\n2. Parse as partial SMILES:\n")
    partial_status = PASS
    try:
        ps.ParseSmiles(smi, partial=True)
    except ps.Error as e:
        print("FAIL\n<<<\n" + str(e) + "\n>>>")
        partial_status = FAIL
            for j in range(1, len(smiB) - 1):
                yield smiA[:i] + smiB[j:]
                yield smiB[:j] + smiA[i:]


if __name__ == "__main__":
    fname = sys.argv[1]
    for smi in mutants(sys.argv[1]):
        try:
            # Does it fail for a substring, but then pass for a
            # longer string? (Or the whole treated as a complete string?)
            substring_failed = False
            for i in range(1, len(smi) + 1):
                sub = smi[:i]
                try:
                    ps.ParseSmiles(sub, partial=True)
                except ps.Error:
                    substring_failed = True
                else:
                    if substring_failed:
                        print("ERROR!")
                        print(smi)
                        continue

            try:
                ps.ParseSmiles(smi, partial=False)
            except ps.Error:
                pass
            else:
                if substring_failed:
                    print("ERROR!")