예제 #1
0
def spacy_doc():
    spacy_lang = cache.load_spacy('en')
    text = """
    Two weeks ago, I was in Kuwait participating in an I.M.F. seminar for Arab educators. For 30 minutes, we discussed the impact of technology trends on education in the Middle East. And then an Egyptian education official raised his hand and asked if he could ask me a personal question: "I heard Donald Trump say we need to close mosques in the United States," he said with great sorrow. "Is that what we want our kids to learn?"
    """
    spacy_doc = spacy_lang(text.strip())
    return spacy_doc
예제 #2
0
 def test_cache(self):
     _ = cache.load_hyphenator(lang='en')
     _ = cache.load_spacy('en')
     self.assertTrue(len(cache.LRU_CACHE.keys()) >= 2)
     # check cache size; low thresh but still larger than if the size of
     # loaded data was not being correctly assessed
     self.assertTrue(cache.LRU_CACHE.currsize >= 1000)
예제 #3
0
def test_cache_clear():
    cache.clear()
    _ = cache.load_hyphenator(lang="en")
    _ = cache.load_spacy("en")
    assert len(cache.LRU_CACHE.keys()) >= 2
    # check cache size; low thresh but still larger than if the size of
    # loaded data was not being correctly assessed
    assert cache.LRU_CACHE.currsize >= 1000
예제 #4
0
 def setUp(self):
     spacy_lang = cache.load_spacy('en')
     text = """
     The unit tests aren't going well.
     I love Python, but I don't love backwards incompatibilities.
     No programmers were permanently damaged for textacy's sake.
     Thank God for Stack Overflow."""
     self.spacy_doc = spacy_lang(text.strip())
예제 #5
0
def spacy_lang():
    spacy_lang = cache.load_spacy('en')
    text_stats_component = spacier.components.TextStatsComponent()
    spacy_lang.add_pipe(text_stats_component, after='parser')

    yield spacy_lang

    # remove component after running these tests
    spacy_lang.remove_pipe('textacy_text_stats')
예제 #6
0
def spacy_lang():
    spacy_lang = cache.load_spacy("en")
    text_stats_component = components.TextStatsComponent()
    spacy_lang.add_pipe(text_stats_component, after="parser")

    yield spacy_lang

    # remove component after running these tests
    spacy_lang.remove_pipe("textacy_text_stats")
예제 #7
0
    def setUp(self):
        spacy_lang = cache.load_spacy('en')
        text = """
        Friedman joined the London bureau of United Press International after completing his master's degree. He was dispatched a year later to Beirut, where he lived from June 1979 to May 1981 while covering the Lebanon Civil War. He was hired by The New York Times as a reporter in 1981 and re-dispatched to Beirut at the start of the 1982 Israeli invasion of Lebanon. His coverage of the war, particularly the Sabra and Shatila massacre, won him the Pulitzer Prize for International Reporting (shared with Loren Jenkins of The Washington Post). Alongside David K. Shipler he also won the George Polk Award for foreign reporting.

        In June 1984, Friedman was transferred to Jerusalem, where he served as the New York Times Jerusalem Bureau Chief until February 1988. That year he received a second Pulitzer Prize for International Reporting, which cited his coverage of the First Palestinian Intifada. He wrote a book, From Beirut to Jerusalem, describing his experiences in the Middle East, which won the 1989 U.S. National Book Award for Nonfiction.

        Friedman covered Secretary of State James Baker during the administration of President George H. W. Bush. Following the election of Bill Clinton in 1992, Friedman became the White House correspondent for the New York Times. In 1994, he began to write more about foreign policy and economics, and moved to the op-ed page of The New York Times the following year as a foreign affairs columnist. In 2002, Friedman won the Pulitzer Prize for Commentary for his "clarity of vision, based on extensive reporting, in commenting on the worldwide impact of the terrorist threat."

        In February 2002, Friedman met Saudi Crown Prince Abdullah and encouraged him to make a comprehensive attempt to end the Arab-Israeli conflict by normalizing Arab relations with Israel in exchange for the return of refugees alongside an end to the Israel territorial occupations. Abdullah proposed the Arab Peace Initiative at the Beirut Summit that March, which Friedman has since strongly supported.

        Friedman received the 2004 Overseas Press Club Award for lifetime achievement and was named to the Order of the British Empire by Queen Elizabeth II.

        In May 2011, The New York Times reported that President Barack Obama "has sounded out" Friedman concerning Middle East issues.
        """
        self.spacy_doc = spacy_lang(preprocess_text(text), disable=['parser'])
예제 #8
0
 def setUp(self):
     text = "I would have lived in peace. But my enemies brought me war."
     spacy_lang = cache.load_spacy('en')
     self.spacy_doc = spacy_lang(text)
     cols = [attrs.TAG, attrs.HEAD, attrs.DEP]
     values = np.array(
         [[13656873538139661788, 3, 426], [16235386156175103506, 2, 402],
          [14200088355797579614, 1, 402],
          [3822385049556375858, 0, 8206900633647566924],
          [1292078113972184607, 18446744073709551615, 440],
          [15308085513773655218, 18446744073709551615, 436],
          [12646065887601541794, 18446744073709551613, 442],
          [17571114184892886314, 3, 404], [4062917326063685704, 1, 437],
          [783433942507015291, 1, 426],
          [17109001835818727656, 0, 8206900633647566924],
          [13656873538139661788, 18446744073709551615, 3965108062993911700],
          [15308085513773655218, 18446744073709551614, 413],
          [12646065887601541794, 18446744073709551613, 442]],
         dtype='uint64')
     self.spacy_doc.from_array(cols, values)
예제 #9
0
 def setUp(self):
     self.text = "The year was 2081, and everybody was finally equal. They weren't only equal before God and the law. They were equal every which way."
     self.spacy_lang = cache.load_spacy('en')
     self.spacy_doc = self.spacy_lang(self.text)
     cols = [attrs.TAG, attrs.HEAD, attrs.DEP]
     values = np.array(
         [[15267657372422890137, 1, 412], [15308085513773655218, 1, 426],
          [17109001835818727656, 0, 8206900633647566924],
          [8427216679587749980, 18446744073709551615, 401],
          [2593208677638477497, 18446744073709551614, 442],
          [17571114184892886314, 18446744073709551613, 404],
          [15308085513773655218, 1, 426],
          [17109001835818727656, 18446744073709551611, 407],
          [164681854541413346, 18446744073709551615, 397],
          [10554686591937588953, 18446744073709551614, 395],
          [12646065887601541794, 18446744073709551613, 442],
          [13656873538139661788, 1, 426],
          [17109001835818727656, 0, 8206900633647566924],
          [164681854541413346, 18446744073709551615, 422],
          [164681854541413346, 1, 397],
          [10554686591937588953, 18446744073709551613, 395],
          [1292078113972184607, 18446744073709551615, 440],
          [15794550382381185553, 18446744073709551615, 436],
          [17571114184892886314, 18446744073709551615, 404],
          [15267657372422890137, 1, 412],
          [15308085513773655218, 18446744073709551613, 407],
          [12646065887601541794, 18446744073709551607, 442],
          [13656873538139661788, 1, 426],
          [17109001835818727656, 0, 8206900633647566924],
          [10554686591937588953, 18446744073709551615, 395],
          [15267657372422890137, 2, 13323405159917154080],
          [17202369883303991778, 1, 412],
          [15308085513773655218, 18446744073709551612, 425],
          [12646065887601541794, 18446744073709551611, 442]],
         dtype='uint64')
     self.spacy_doc.from_array(cols, values)
     self.tempdir = tempfile.mkdtemp(prefix='test_fileio',
                                     dir=os.path.dirname(
                                         os.path.abspath(__file__)))
     self.tests_dir = os.path.split(__file__)[0]
     self.maxDiff = None
예제 #10
0
def spacy_doc():
    spacy_lang = cache.load_spacy('en')
    spacy_doc = spacy_lang(TEXT)
    cols = [attrs.TAG, attrs.HEAD, attrs.DEP]
    values = np.array(
        [[15267657372422890137, 1, 412],
         [15308085513773655218, 1, 426],
         [17109001835818727656, 0, 8206900633647566924],
         [8427216679587749980, 18446744073709551615, 401],
         [2593208677638477497, 18446744073709551614, 442],
         [17571114184892886314, 18446744073709551613, 404],
         [15308085513773655218, 1, 426],
         [17109001835818727656, 18446744073709551611, 407],
         [164681854541413346, 18446744073709551615, 397],
         [10554686591937588953, 18446744073709551614, 395],
         [12646065887601541794, 18446744073709551613, 442],
         [13656873538139661788, 1, 426],
         [17109001835818727656, 0, 8206900633647566924],
         [164681854541413346, 18446744073709551615, 422],
         [164681854541413346, 1, 397],
         [10554686591937588953, 18446744073709551613, 395],
         [1292078113972184607, 18446744073709551615, 440],
         [15794550382381185553, 18446744073709551615, 436],
         [17571114184892886314, 18446744073709551615, 404],
         [15267657372422890137, 1, 412],
         [15308085513773655218, 18446744073709551613, 407],
         [12646065887601541794, 18446744073709551607, 442],
         [13656873538139661788, 1, 426],
         [17109001835818727656, 0, 8206900633647566924],
         [10554686591937588953, 18446744073709551615, 395],
         [15267657372422890137, 2, 13323405159917154080],
         [17202369883303991778, 1, 412],
         [15308085513773655218, 18446744073709551612, 425],
         [12646065887601541794, 18446744073709551611, 442]],
        dtype='uint64')
    spacy_doc.from_array(cols, values)
    return spacy_doc
예제 #11
0
def test_make_doc_from_text_chunks():
    text = "Burton forgot to add tests for this function."
    for lang in ("en", cache.load_spacy("en")):
        spacy_doc = utils.make_doc_from_text_chunks(text, lang)
        assert isinstance(spacy_doc, SpacyDoc)
        assert spacy_doc.text == text
예제 #12
0
def test_load_spacy():
    for lang in ("en", "en_core_web_sm"):
        for disable in (None, ("parser", "ner")):
            assert isinstance(cache.load_spacy(lang, disable=disable),
                              spacy.language.Language)
예제 #13
0
def test_load_spacy_hashability():
    with pytest.raises(TypeError):
        _ = cache.load_spacy("en", disable=["tagger", "parser", "ner"])
예제 #14
0
def test_corpus_init_lang():
    assert isinstance(Corpus('en'), Corpus)
    assert isinstance(Corpus(cache.load_spacy('en')), Corpus)
    for bad_lang in (b'en', None):
        with pytest.raises(TypeError):
            Corpus(bad_lang)
예제 #15
0
 def test_invalid_content_lang_combo(self):
     spacy_lang = cache.load_spacy('en')
     with self.assertRaises(ValueError):
         Doc(spacy_lang('Hola, cómo estás mi amigo?'), lang='es')
예제 #16
0
def test_invalid_content_lang_combo():
    spacy_lang = cache.load_spacy("en")
    with pytest.raises(ValueError):
        _ = Doc(spacy_lang("Hola, cómo estás mi amigo?"), lang="es")
예제 #17
0
def corpus():
    spacy_lang = cache.load_spacy('en')
    records = DATASET.records(speaker_name={'Bernie Sanders'}, limit=10)
    text_stream, metadata_stream = io.split_records(records, 'text')
    corpus = Corpus(spacy_lang, texts=text_stream, metadatas=metadata_stream)
    return corpus
예제 #18
0
def test_load_spacy():
    for lang in ('en', 'en_core_web_sm'):
        for disable in (None, ('parser', 'ner')):
            assert isinstance(cache.load_spacy(lang, disable=disable),
                              spacy.language.Language)
예제 #19
0
def test_lang_spacylang():
    spacy_lang = cache.load_spacy('en')
    assert isinstance(Doc('This is an English sentence.', lang=spacy_lang),
                      Doc)
예제 #20
0
def test_load_spacy_hashability():
    with pytest.raises(TypeError):
        _ = cache.load_spacy('en', disable=['tagger', 'parser', 'ner'])
예제 #21
0
def test_corpus_init_no_parser():
    spacy_lang = cache.load_spacy('en', disable=('parser', ))
    corpus = Corpus(spacy_lang,
                    docs=(spacy_lang('This is a sentence in a doc.'), ))
    assert corpus.n_sents is None and len(corpus) == 1
예제 #22
0
 def test_load_spacy(self):
     for lang in ('en', 'en_core_web_sm'):
         for disable in (None, ('parser', 'ner')):
             _ = cache.load_spacy(lang, disable=disable)
예제 #23
0
def spacy_doc():
    text = "I would have lived in peace. But my enemies brought me war."
    spacy_lang = cache.load_spacy("en")
    spacy_doc = spacy_lang(text)
    return spacy_doc
예제 #24
0
def test_lang_spacylang():
    spacy_lang = cache.load_spacy("en")
    assert isinstance(Doc("This is an English sentence.", lang=spacy_lang), Doc)
예제 #25
0
 def test_spacydoc_content(self):
     spacy_lang = cache.load_spacy('en')
     spacy_doc = spacy_lang('This is an English sentence.')
     self.assertIsInstance(Doc(spacy_doc), Doc)
예제 #26
0
 def test_load_spacy_hashability(self):
     with self.assertRaises(TypeError):
         _ = cache.load_spacy('en', disable=['tagger', 'parser', 'ner'])
예제 #27
0
def doc(text):
    spacy_lang = cache.load_spacy('en')
    return Doc(text.strip(), lang=spacy_lang)
예제 #28
0
 def test_lang_spacylang(self):
     spacy_lang = cache.load_spacy('en')
     self.assertIsInstance(
         Doc('This is an English sentence.', lang=spacy_lang), Doc)
예제 #29
0
def test_invalid_content_lang_combo():
    spacy_lang = cache.load_spacy('en')
    with pytest.raises(ValueError):
        _ = Doc(spacy_lang('Hola, cómo estás mi amigo?'), lang='es')
예제 #30
0
def test_spacydoc_content():
    spacy_lang = cache.load_spacy("en")
    spacy_doc = spacy_lang("This is an English sentence.")
    assert isinstance(Doc(spacy_doc), Doc)