Esempi in Python per Lookups.add_table, esempi in Python per spacy.lookups.Lookups.add_table

Esempio n. 1

0

Mostra file

File: model.py Progetto: wd6/concept-tagging-training

    def concept_sets(self, value):
        """
        Sets concepts_sets and the attributes derived from it.

        Args:
            value (list of list of str): A list of lists of strings; each string being a concept,
                each set in the larger list corresponding to a document which has the tags seen in the set.
        """
        self._concept_sets = value
        LOG.debug("Extracting raw keywords as concepts.")
        all_concepts = [
            concept
            for concept_set in tqdm(self._concept_sets)
            for concept in concept_set
            if concept.strip() != ""
        ]
        raw_concepts = set(all_concepts)

        LOG.debug("Lemmatizing {} raw concepts.".format(len(raw_concepts)))
        concepts = [c.lower() for c in raw_concepts]

        self.raw2lemma = {rc: c for rc, c in zip(raw_concepts, concepts)}
        lookups = Lookups()
        lookups.add_table("lemma_lookup", self.raw2lemma)
        self.lemmatizer = Lemmatizer(lookups)
        self.lemma2raw = {v: k for k, v in self.raw2lemma.items()}
        lemma_concepts = [
            self.lemmatizer(concept, "NOUN")[0] for concept in all_concepts
        ]
        self.concepts_frequencies = Counter(lemma_concepts)
        self.concepts = set(lemma_concepts)
        self._fit_concept_indices()

Esempio n. 2

0

Mostra file

File: fi.py Progetto: aajanki/spacy-fi

def create_lookups_from_json_reader(path: Path) -> Lookups:
    lookups = Lookups()
    for p in path.glob("*.json"):
        table_name = p.stem
        data = srsly.read_json(p)
        lookups.add_table(table_name, data)
    return lookups

Esempio n. 3

0

Mostra file

File: test_lookups.py Progetto: cisco00/Sentimental-Analysis-on-threat

def test_lookups_api():
    table_name = "test"
    data = {"foo": "bar", "hello": "world"}
    lookups = Lookups()
    lookups.add_table(table_name, data)
    assert len(lookups) == 1
    assert table_name in lookups
    assert lookups.has_table(table_name)
    table = lookups.get_table(table_name)
    assert table.name == table_name
    assert len(table) == 2
    assert table["hello"] == "world"
    table["a"] = "b"
    assert table["a"] == "b"
    table = lookups.get_table(table_name)
    assert len(table) == 3
    with pytest.raises(KeyError):
        lookups.get_table("xyz")
    with pytest.raises(ValueError):
        lookups.add_table(table_name)
    table = lookups.remove_table(table_name)
    assert table.name == table_name
    assert len(lookups) == 0
    assert table_name not in lookups
    with pytest.raises(KeyError):
        lookups.get_table(table_name)

Esempio n. 4

0

Mostra file

def lemmatizer():
    lookups = Lookups()
    lookups.add_table("lemma_lookup", {
        "dogs": "dog",
        "boxen": "box",
        "mice": "mouse"
    })
    return Lemmatizer(lookups)

Esempio n. 5

0

Mostra file

File: test_lemmatizer.py Progetto: mosynaq/spaCy

 def cope_lookups():
     lookups = Lookups()
     lookups.add_table("lemma_lookup", {"cope": "cope", "coped": "cope"})
     lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
     lookups.add_table("lemma_exc", {"verb": {"coping": ("cope", )}})
     lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
     return lookups

Esempio n. 6

0

Mostra file

 def make_lookups_bin(self,
                      lookup_name_pattern='lemma_lookup_{}',
                      filename_pattern='it_lemma_lookup_{}.json'):
     lookups = Lookups()
     lookup_keys = list(self.tag_map.keys())
     for lookup_pos in lookup_keys:
         lookup_name = lookup_name_pattern.format(lookup_pos.lower())
         filename = filename_pattern.format(lookup_pos.lower())
         with open(os.path.join(self.out_path, filename)) as json_file:
             lookup_dict = json.load(json_file)
         lookups.add_table(lookup_name, lookup_dict)
     with open(os.path.join(self.out_path,
                            'it_lemma_lookup.json')) as json_file:
         lookup_dict = json.load(json_file)
     lookups.add_table('lemma_lookup', lookup_dict)
     lookups.to_disk(self.out_path, 'lookups.bin')

Esempio n. 7

0

Mostra file

File: test_lemmatizer.py Progetto: cisco00/Sentimental-Analysis-on-threat

def test_lemmatizer_without_is_base_form_implementation():
    # Norwegian example from #5658
    lookups = Lookups()
    lookups.add_table("lemma_rules", {"noun": []})
    lookups.add_table("lemma_index", {"noun": {}})
    lookups.add_table("lemma_exc",
                      {"noun": {
                          "formuesskatten": ["formuesskatt"]
                      }})

    lemmatizer = Lemmatizer(lookups, is_base_form=None)
    assert lemmatizer("Formuesskatten", "noun", {
        'Definite': 'def',
        'Gender': 'masc',
        'Number': 'sing'
    }) == ["formuesskatt"]

Esempio n. 8

0

Mostra file

File: test_lookups.py Progetto: cisco00/Sentimental-Analysis-on-threat

def test_lookups_to_from_disk():
    lookups = Lookups()
    lookups.add_table("table1", {"foo": "bar", "hello": "world"})
    lookups.add_table("table2", {"a": 1, "b": 2, "c": 3})
    with make_tempdir() as tmpdir:
        lookups.to_disk(tmpdir)
        new_lookups = Lookups()
        new_lookups.from_disk(tmpdir)
    assert len(new_lookups) == 2
    assert "table1" in new_lookups
    assert "table2" in new_lookups
    table1 = new_lookups.get_table("table1")
    assert len(table1) == 2
    assert table1["foo"] == "bar"
    table2 = new_lookups.get_table("table2")
    assert len(table2) == 3
    assert table2["b"] == 2

Esempio n. 9

0

Mostra file

File: test_lookups.py Progetto: cisco00/Sentimental-Analysis-on-threat

def test_lookups_to_from_bytes():
    lookups = Lookups()
    lookups.add_table("table1", {"foo": "bar", "hello": "world"})
    lookups.add_table("table2", {"a": 1, "b": 2, "c": 3})
    lookups_bytes = lookups.to_bytes()
    new_lookups = Lookups()
    new_lookups.from_bytes(lookups_bytes)
    assert len(new_lookups) == 2
    assert "table1" in new_lookups
    assert "table2" in new_lookups
    table1 = new_lookups.get_table("table1")
    assert len(table1) == 2
    assert table1["foo"] == "bar"
    table2 = new_lookups.get_table("table2")
    assert len(table2) == 3
    assert table2["b"] == 2
    assert new_lookups.to_bytes() == lookups_bytes

Esempio n. 10

0

Mostra file

File: test_lookups.py Progetto: yanaiela/spaCy

def test_lookups_api():
    table_name = "test"
    data = {"foo": "bar", "hello": "world"}
    lookups = Lookups()
    lookups.add_table(table_name, data)
    assert table_name in lookups
    assert lookups.has_table(table_name)
    table = lookups.get_table(table_name)
    assert table.name == table_name
    assert len(table) == 2
    assert table.get("hello") == "world"
    table.set("a", "b")
    assert table.get("a") == "b"
    table = lookups.get_table(table_name)
    assert len(table) == 3
    with pytest.raises(KeyError):
        lookups.get_table("xyz")

Esempio n. 11

0

Mostra file

File: model.py Progetto: wd6/concept-tagging-training

    def from_jsons(
        self, in_indices, in_raw2lemma
    ):  # a little strange because it does not fill in all attributes
        """
        Load index and raw2lemma dictionaries into empty ConceptExtractor

        Args:
            in_indices ():
            in_raw2lemma ():
        """
        with open(in_indices, "r") as f0:
            self.concept_index_mapping = json.load(f0)
        with open(in_raw2lemma, "r") as f0:
            self.raw2lemma = json.load(f0)
        lookups = Lookups()
        lookups.add_table("lemma_lookup", self.raw2lemma)
        self.lemmatizer = Lemmatizer(lookups)
        self.lemma2raw = {v: k for k, v in self.raw2lemma.items()}
        self.concepts = self.concept_index_mapping.keys()
        tmp_frequencies = {
            concept: len(index) for concept, index in self.concept_index_mapping.items()
        }
        self.concepts_frequencies = Counter(tmp_frequencies)

Esempio n. 12

0

Mostra file

File: test_lemmatizer.py Progetto: mosynaq/spaCy

def test_lemmatizer_init(nlp):
    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
    assert isinstance(lemmatizer.lookups, Lookups)
    assert not lemmatizer.lookups.tables
    assert lemmatizer.mode == "lookup"
    with pytest.raises(ValueError):
        nlp("test")
    nlp.initialize()
    assert lemmatizer.lookups.tables
    assert nlp("cope")[0].lemma_ == "cope"
    assert nlp("coped")[0].lemma_ == "cope"
    # replace any tables from spacy-lookups-data
    lemmatizer.lookups = Lookups()
    # lookup with no tables sets text as lemma
    assert nlp("cope")[0].lemma_ == "cope"
    assert nlp("coped")[0].lemma_ == "coped"
    nlp.remove_pipe("lemmatizer")
    lemmatizer = nlp.add_pipe("lemmatizer", config={"mode": "lookup"})
    with pytest.raises(ValueError):
        # Can't initialize without required tables
        lemmatizer.initialize(lookups=Lookups())
    lookups = Lookups()
    lookups.add_table("lemma_lookup", {})
    lemmatizer.initialize(lookups=lookups)

Esempio n. 13

0

Mostra file

File: test_issue1-1000.py Progetto: xettrisomeman/spaCy

def test_issue595():
    """Test lemmatization of base forms"""
    words = ["Do", "n't", "feed", "the", "dog"]
    lookups = Lookups()
    lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
    lookups.add_table("lemma_index", {"verb": {}})
    lookups.add_table("lemma_exc", {"verb": {}})
    vocab = Vocab()
    doc = Doc(vocab, words=words)
    doc[2].tag_ = "VB"
    assert doc[2].text == "feed"
    assert doc[2].lemma_ == "feed"

Esempio n. 14

0

Mostra file

def test_issue1387():
    tag_map = {"VBG": {POS: VERB, VerbForm_part: True}}
    lookups = Lookups()
    lookups.add_table("lemma_index", {"verb": ("cope", "cop")})
    lookups.add_table("lemma_exc", {"verb": {"coping": ("cope", )}})
    lookups.add_table("lemma_rules", {"verb": [["ing", ""]]})
    lemmatizer = Lemmatizer(lookups)
    vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
    doc = Doc(vocab, words=["coping"])
    doc[0].tag_ = "VBG"
    assert doc[0].text == "coping"
    assert doc[0].lemma_ == "cope"

Esempio n. 15

0

Mostra file

File: test_issue1-1000.py Progetto: monasaad/CAPEsFinal

def test_issue595():
    """Test lemmatization of base forms"""
    words = ["Do", "n't", "feed", "the", "dog"]
    tag_map = {"VB": {POS: VERB, VerbForm_inf: True}}
    lookups = Lookups()
    lookups.add_table("lemma_rules", {"verb": [["ed", "e"]]})
    lookups.add_table("lemma_index", {"verb": {}})
    lookups.add_table("lemma_exc", {"verb": {}})
    lemmatizer = Lemmatizer(lookups)
    vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
    doc = Doc(vocab, words=words)
    doc[2].tag_ = "VB"
    assert doc[2].text == "feed"
    assert doc[2].lemma_ == "feed"

Esempio n. 16

0

Mostra file

    def lemmatize(self, tokens, toke=False):

        lookups = Lookups()
        lookups.add_table('lemma_index', lemma_index)
        lookups.add_table('lemma_exc', lemma_exc)
        lookups.add_table('lemma_rules', lemma_rules)
        lemmatizer = Lemmatizer(lookups)

        lemmas = []
        for t in tokens:
            lemmas.append(lemmatizer(token.text, token.tag_))

        if toke:
            return lemmas

        return " ".join(lemmas)

Esempio n. 17

0

Mostra file

File: ner_utils.py Progetto: aixpact-ml/first_azure

def lemmatize():
    """"""
    lookups = Lookups()
    lookups.add_table("lemma_rules", {"noun": [["s", ""]]})
    lemmatizer = Lemmatizer(lookups)
    return lemmatizer

Esempio n. 18

0

Mostra file

def create_lemmatizer():
    lookups = Lookups()
    with open("lookups/fi_lemma_exc.json") as f:
        lookups.add_table("lemma_exc", json.load(f))
    return FinnishLemmatizer(lookups)

Esempio n. 19

0

Mostra file

File: 2-svd-nmf-topic-modeling.py Progetto: randomgeek78/course-nlp

# pip install -U spacy
# ```
#
# You will then need to download the English model:
# ```
# spacy -m download en_core_web_sm
# ```

# %%
import spacy

# %%
from spacy.lemmatizer import Lemmatizer
from spacy.lookups import Lookups
lookups = Lookups()
lookups.add_table("lemma_rules", {"noun": [["s", ""]]})

lemmatizer = Lemmatizer(lookups)

# %%
[lemmatizer.lookup(word) for word in word_list]

# %% [markdown]
# Spacy doesn't offer a stemmer (since lemmatization is considered better-- this is an example of being opinionated!)

# %% [markdown]
# Stop words vary from library to library

# %%
nlp = spacy.load("en_core_web_sm")