Python CandidateGeneratorの例

プログラミング言語: Python

名前空間/パッケージ名: scispacy.candidate_generation

クラス/型: CandidateGenerator

hotexamples.comのコード掲載数: 7

Python CandidateGenerator - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscispacy.candidate_generation.CandidateGeneratorの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

CandidateGenerator(7)

よく使われるメソッド

CandidateGenerator (7)

コード例 #1

ファイルを表示

ファイル: linking.py プロジェクト: swipswaps/scispacy

    def __init__(
        self,
        nlp: Language = None,
        name: str = "scispacy_linker",
        candidate_generator: CandidateGenerator = None,
        resolve_abbreviations: bool = True,
        k: int = 30,
        threshold: float = 0.7,
        no_definition_threshold: float = 0.95,
        filter_for_definitions: bool = True,
        max_entities_per_mention: int = 5,
        linker_name: str = None,
    ):
        # TODO(Mark): Remove in scispacy v1.0.
        Span.set_extension("umls_ents", default=[], force=True)
        Span.set_extension("kb_ents", default=[], force=True)

        self.candidate_generator = candidate_generator or CandidateGenerator(
            name=linker_name)
        self.resolve_abbreviations = resolve_abbreviations
        self.k = k
        self.threshold = threshold
        self.no_definition_threshold = no_definition_threshold
        self.kb = self.candidate_generator.kb
        self.filter_for_definitions = filter_for_definitions
        self.max_entities_per_mention = max_entities_per_mention

        # TODO(Mark): Remove in scispacy v1.0. This is for backward compatability only.
        self.umls = self.kb

コード例 #2

ファイルを表示

ファイル: test_candidate_generation.py プロジェクト: WuraolaOyewusi/scispacy

    def test_candidate_generation(self):

        umls_fixture = UmlsKnowledgeBase(
            "tests/fixtures/umls_test_fixture.json")
        with tempfile.TemporaryDirectory() as dir_name:
            umls_concept_aliases, tfidf_vectorizer, ann_index = create_tfidf_ann_index(
                dir_name, umls_fixture)

        candidate_generator = CandidateGenerator(ann_index, tfidf_vectorizer,
                                                 umls_concept_aliases,
                                                 umls_fixture)
        results = candidate_generator(['(131)I-Macroaggregated Albumin'], 10)

        canonical_ids = [x.concept_id for x in results[0]]
        assert canonical_ids == ['C0000005', 'C0000102', 'C0000084']

        # The mention was an exact match, so should have a distance of zero to a concept:
        assert results[0][0] == MentionCandidate(
            concept_id='C0000005',
            aliases=['(131)I-Macroaggregated Albumin'],
            similarities=[1.0])

        # Test we don't crash with zero vectors
        results = candidate_generator(['ZZZZ'], 10)
        assert results == [[]]

コード例 #3

ファイルを表示

    def test_empty_list(self):
        
        umls_fixture = UmlsKnowledgeBase("tests/fixtures/umls_test_fixture.json")
        with tempfile.TemporaryDirectory() as dir_name:
            umls_concept_aliases, tfidf_vectorizer, ann_index = create_tfidf_ann_index(dir_name, umls_fixture)

        candidate_generator = CandidateGenerator(ann_index, tfidf_vectorizer, umls_concept_aliases, umls_fixture)
        results = candidate_generator([], 10)

        assert results == []

コード例 #4

ファイルを表示

    def setUp(self):
        super().setUp()
        self.nlp = spacy.load("en_core_web_sm")

        umls_fixture = UmlsKnowledgeBase("tests/fixtures/umls_test_fixture.json", "tests/fixtures/test_umls_tree.tsv")
        with tempfile.TemporaryDirectory() as dir_name:
            umls_concept_aliases, tfidf_vectorizer, ann_index = create_tfidf_ann_index(dir_name, umls_fixture)
        candidate_generator = CandidateGenerator(ann_index, tfidf_vectorizer, umls_concept_aliases, umls_fixture)

        self.linker = UmlsEntityLinker(candidate_generator, filter_for_definitions=False)

コード例 #5

ファイルを表示

    def __init__(self,
                 candidate_generator: CandidateGenerator = None,
                 resolve_abbreviations: bool = True,
                 k: int = 30,
                 threshold: float = 0.7,
                 filter_for_definitions: bool = True,
                 max_entities_per_mention: int = 5):

        Span.set_extension("umls_ents", default=[], force=True)

        self.candidate_generator = candidate_generator or CandidateGenerator()
        self.resolve_abbreviations = resolve_abbreviations
        self.k = k
        self.threshold = threshold
        self.umls = self.candidate_generator.umls
        self.filter_for_definitions = filter_for_definitions
        self.max_entities_per_mention = max_entities_per_mention

コード例 #6

ファイルを表示

def init_umls_nlp_linker():
    base_dir = ''
    tfidf_path = base_dir + 'tfidf_vectors_sparse.npz'
    ann_path = base_dir + 'nmslib_index.bin'
    ann_index = load_approximate_nearest_neighbours_index(
        tfidf_vectors_path=tfidf_path, ann_index_path=ann_path)
    vec = joblib.load(cached_path(base_dir + 'tfidf_vectorizer.joblib'))
    ann_concept = json.load(
        open(cached_path(base_dir + 'concept_aliases.json')))
    umlsknowlegebase = UmlsKnowledgeBase(
        file_path=base_dir + 'umls_2017_aa_cat0129.json',
        types_file_path=base_dir + 'umls_semantic_type_tree.tsv')
    cg = CandidateGenerator(ann_index=ann_index,
                            tfidf_vectorizer=vec,
                            ann_concept_aliases_list=ann_concept,
                            umls=umlsknowlegebase)
    linker = UmlsEntityLinker(candidate_generator=cg,
                              max_entities_per_mention=1)
    nlp.add_pipe(linker)
    return linker

コード例 #7

ファイルを表示

import os
from multiprocessing import Pool
import multiprocessing as multi
import pickle
import scispacy
from scispacy.linking import EntityLinker
from spacy.symbols import ORTH
import time
import re
from spacy.language import Language
import pdb
import copy
from tqdm import tqdm
from scispacy.candidate_generation import CandidateGenerator

MeshCandidateGenrator = CandidateGenerator(name='mesh')
KB=MeshCandidateGenrator.kb
K=100
Resolve_abbreviations = True
Threshold = 0.3
No_definition_threshold = 0.95
Filter_for_definitions = True
Max_entities_per_mention  = 30

def candidate_dui_generator(mention_strings):
    batch_candidates = MeshCandidateGenrator(mention_strings, K)
    batched_sorted_candidates = list()
    for candidates in batch_candidates:
        predicted = []
        for cand in candidates:
            score = max(cand.similarities)