def _load_data_set(self):
     # TODO: STAGE-1676 Right now, we load external data sets from spacy. In the future research should be done to
     # check if it's possible to use more advanced, word2vec specific datasets.
     if get_bool_property("datasource", "is_english"):
         self.nlp_model = spacy.load('en_core_web_md')
     else:
         self.nlp_model = spacy.load('nl_core_news_md')
Esempio n. 2
0
 def _initiate_language_data(self):
     is_english = get_bool_property('datasource', 'is_english')
     if (is_english):
         self._nlp_model = spacy.load("en_core_web_md")
         #  An English dataset is unlikely to contain many Dutch text, Dutch stopwords are not merged with the English ones.
         self._stopwords = self._nlp_model.Defaults.stop_words
     else:
         self._nlp_model = spacy.load("nl_core_news_md")
         #  Dutch people love the English language, a lot of of our people write in English, so if we're processing
         #  Dutch text, we also filter out the English stopwords.
         self._stopwords = self._nlp_model.Defaults.stop_words | spacy.load(
             "en_core_web_md").Defaults.stop_words
Esempio n. 3
0
 def test_get_bool_property_success(self, mocker):
     # Setup
     input = "TrUe"
     expected = True
     read_property_mock = mocker.patch(
         "helper.ConfigReader._read_property",
         return_value = input
     )
     # Run
     actual = get_bool_property("test", "parameters")
     # Check
     assert actual == expected
     read_property_mock.assert_called_once_with("test", "parameters")
Esempio n. 4
0
def start_data_loading():
    repeat_interval: int = get_int_property("datasource",
                                            "automatic_check_interval")

    if get_bool_property("datasource", "automatic_check"):
        start_time = time()
        while True:
            _load_and_process_data()
            time_to_next_iteration = (start_time + repeat_interval) - time()
            if time_to_next_iteration > 0:
                sleep(time_to_next_iteration)
            start_time = time()
    else:
        _load_and_process_data()