Esempio n. 1
0
    def process(self, text):
        with torch.no_grad():
            _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text]
            max_len = len(_X)

            _X_Len = torch.tensor([len(_X)], dtype=torch.long, device=device)
            _X = torch.tensor([_X], dtype=torch.long, device=device)

            pred_tags = self.model(_X, _X_Len)[0]
            pred_tags = [tag_list[_] for _ in pred_tags]

        result = []
        pred_B_idx = [i for i, l in enumerate(pred_tags) if 'B' in l]
        for i in pred_B_idx:
            e = text[i]
            e_n = pred_tags[i]
            j = i + 1
            while j < len(pred_tags):
                if pred_tags[j] == 'O' or 'B' in pred_tags[j]:
                    break
                e += text[j]
                j += 1
            result.append((e, str(i), e_n.split('_')[-1]))

        # if '体检' in text:
        #     result.append(('体检', str(text.index('体检')), 'diagnosis'))

        return result
Esempio n. 2
0
    def process(self, text):
        x_ids = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text]
        x_mask = [1] * len(x_ids)

        x_ids = torch.tensor([x_ids], dtype=torch.long, device=device)
        x_mask = torch.tensor([x_mask], dtype=torch.long, device=device)
        x_seg = torch.zeros(*x_ids.size(), dtype=torch.long, device=device)

        with torch.no_grad():
            k = self.model(x_ids, x_mask, x_seg)
            k = torch.softmax(k, dim=-1)
            k = k[0, :].detach().cpu().numpy()

        nodes = [dict(zip(map(str, range(9)), _k)) for _k in k]
        tags = viterbi(nodes)
        result = []
        for ts in re.finditer('(12+)|(34+)|(56+)|(78+)', tags):
            r = text[ts.start():ts.end()]
            r = ''.join(r)
            result.append((r, str(ts.start()),
                           tag_list[int(ts.group()[0])].split('_')[-1]))
        if '体检' in text:
            result.append(('体检', str(text.index('体检')), 'diagnosis'))

        return result
Esempio n. 3
0
    def __iter__(self):
        idxs = list(range(len(self.data)))
        np.random.shuffle(idxs)
        X, S, X_MASK = [], [], []
        for i in idxs:
            text, label = self.data[i]

            x = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text]
            x_mask = [1] * len(x)
            s = [tag_dictionary[l] for l in label]

            X.append(x)
            S.append(s)
            X_MASK.append(x_mask)
            if len(X) == self.batch_size or i == idxs[-1]:
                X = torch.tensor(seq_padding(X), dtype=torch.long)
                S = torch.tensor(seq_padding(S), dtype=torch.long)
                X_MASK = torch.tensor(seq_padding(X_MASK), dtype=torch.long)
                X_SEG = torch.zeros(*X.size(), dtype=torch.long)

                yield [X, S, X_MASK, X_SEG]
                X, S, X_MASK = [], [], []
Esempio n. 4
0
def extract_items(text_in):
    _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text_in]
    _X_MASK = [1] * len(_X)
    _X = torch.tensor([_X], dtype=torch.long, device=device)
    _X_MASK = torch.tensor([_X_MASK], dtype=torch.long, device=device)
    _X_SEG = torch.zeros(*_X.size(), dtype=torch.long, device=device)

    with torch.no_grad():
        _k = subject_model(_X, _X_SEG, _X_MASK)
        _k = torch.softmax(_k, dim=-1)
        _k = _k[0, :].detach().cpu().numpy()

    nodes = [dict(zip(list(map(str, range(9))), k)) for k in _k]
    tags = viterbi(nodes)
    result = []
    for ts in re.finditer('(12+)|(34+)|(56+)|(78+)', tags):
        r = text_in[ts.start():ts.end()]
        r = ''.join(r)
        result.append(
            (r, str(ts.start()), tag_list[int(ts.group()[0])].split('_')[-1]))

    return result
Esempio n. 5
0
    T = []
    B_idx = [i for i, l in enumerate(mention) if 'B' in l]
    for i in B_idx:
        e = text[i]
        e_n = mention[i]
        j = i + 1
        while j < len(mention):
            if mention[j] == 'O' or 'B' in mention[j]:
                break
            e += text[j]
            j += 1
        T.append((e, str(i), e_n.split('_')[-1]))
        # T.append((e, str(i)))

    with torch.no_grad():
        _X = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text]
        max_len = len(_X)

        _X_Len = torch.tensor([len(_X)], dtype=torch.long, device=device)
        _X = torch.tensor([_X], dtype=torch.long, device=device)

        pred_tags = model(_X, _X_Len)[0]
        pred_tags = [tag_list[_] for _ in pred_tags]

    R = []
    pred_B_idx = [i for i, l in enumerate(pred_tags) if 'B' in l]
    for i in pred_B_idx:
        e = text[i]
        e_n = pred_tags[i]
        j = i + 1
        while j < len(pred_tags):
    T = []
    B_idx = [i for i, l in enumerate(mention) if 'B' in l]
    for i in B_idx:
        e = text[i]
        e_n = mention[i]
        j = i + 1
        while j < len(mention):
            if mention[j] == 'O' or 'B' in mention[j]:
                break
            e += text[j]
            j += 1
        # T.append((e, str(i), e_n.split('_')[-1]))
        T.append((e, str(i)))

    x_ids = [bert_vocab.get(c, bert_vocab.get('[UNK]')) for c in text]
    x_mask = [1] * len(x_ids)

    x_ids = torch.tensor([x_ids], dtype=torch.long, device=device)
    x_mask = torch.tensor([x_mask], dtype=torch.long, device=device)
    x_seg = torch.zeros(*x_ids.size(), dtype=torch.long, device=device)

    with torch.no_grad():
        try:

            k = model(x_ids, x_mask, x_seg)
            k = torch.softmax(k, dim=-1)
            kk = k[0, :].detach().cpu().numpy()
        except Exception:
            print(f'text: {text}, k:{k}')