def test_search_break(self): api = WikipediaAPI() # stop immediately result = api.search('en', ['Clinton'], articles_per_query=2, should_break=mock.Mock(return_value=True)) self.assertEqual(len(result), 0) # stop inside recursion result = api.search('en', ['Scarf'], articles_per_query=3, should_break=StopingMock(4)) self.assertEqual(len(result), 2)
def test_search_break(self): api = WikipediaAPI() # stop immediately result = api.search('en', ['Clinton'], articles_per_query=2, should_break=mock.Mock(return_value=True)) self.assertEqual(len(result), 0) # stop inside recursion result_all = api.search('en', ['Scarf'], articles_per_query=3) result_stopped = api.search('en', ['Scarf'], articles_per_query=3, should_break=StoppingMock(allow_calls=1)) self.assertLess(len(result_stopped), len(result_all))
def test_search(self): on_progress = mock.MagicMock() api = WikipediaAPI() result = api.search('en', ['Clinton'], articles_per_query=2, on_progress=on_progress) self.assertIsInstance(result, Corpus) self.assertEquals(len(result.domain.attributes), 0) self.assertEquals(len(result.domain.metas), 7) self.assertEquals(len(result), 2) self.assertEquals(on_progress.call_count, 2) progress = 0 for arg in on_progress.call_args_list: self.assertGreater(arg[0][0], progress) progress = arg[0][0]
class OWWikipedia(OWWidget): """ Get articles from wikipedia. """ name = '维基百科' priority = 160 icon = 'icons/Wikipedia.svg' class Outputs: corpus = Output("Corpus", Corpus) want_main_area = False resizing_enabled = False label_width = 1 widgets_width = 2 attributes = [feat.name for feat in WikipediaAPI.string_attributes] text_includes = settings.Setting( [feat.name for feat in WikipediaAPI.string_attributes]) query_list = settings.Setting([]) language = settings.Setting('en') articles_per_query = settings.Setting(10) info_label = '文章数量 {:d}' class Error(OWWidget.Error): api_error = Msg('API error: {}') class Warning(OWWidget.Warning): no_text_fields = Msg('未选择文字功能时,将推断文字功能') def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.api = WikipediaAPI(on_error=self.Error.api_error) self.result = None query_box = gui.hBox(self.controlArea, '查询') # Queries configuration layout = QGridLayout() layout.setSpacing(7) row = 0 self.query_edit = ListEdit(self, 'query_list', "每一行表示一个不同的查询", 100, self) layout.addWidget(QLabel('查询词:'), row, 0, 1, self.label_width) layout.addWidget(self.query_edit, row, self.label_width, 1, self.widgets_width) # Language row += 1 language_edit = ComboBox(self, 'language', tuple(sorted(lang2code.items()))) layout.addWidget(QLabel('语言:'), row, 0, 1, self.label_width) layout.addWidget(language_edit, row, self.label_width, 1, self.widgets_width) # Articles per query row += 1 layout.addWidget(QLabel('每次查询文章数量:'), row, 0, 1, self.label_width) slider = gui.valueSlider(query_box, self, 'articles_per_query', box='', values=[1, 3, 5, 10, 25]) layout.addWidget(slider.box, row, 1, 1, self.widgets_width) query_box.layout().addLayout(layout) self.controlArea.layout().addWidget(query_box) self.controlArea.layout().addWidget( CheckListLayout('包含的内容', self, 'text_includes', self.attributes, cols=2, callback=self.set_text_features)) self.info_box = gui.hBox(self.controlArea, '基本信息') self.result_label = gui.label(self.info_box, self, self.info_label.format(0)) self.button_box = gui.hBox(self.controlArea) self.search_button = gui.button(self.button_box, self, '查询', self.start_stop) self.search_button.setFocusPolicy(Qt.NoFocus) def start_stop(self): if self.search.running: self.search.stop() else: self.search() @asynchronous def search(self): return self.api.search(lang=self.language, queries=self.query_list, articles_per_query=self.articles_per_query, on_progress=self.progress_with_info, should_break=self.search.should_break) @search.callback(should_raise=False) def progress_with_info(self, progress, n_retrieved): self.progressBarSet(100 * progress, None) self.result_label.setText(self.info_label.format(n_retrieved)) @search.on_start def on_start(self): self.Error.api_error.clear() self.progressBarInit(None) self.search_button.setText('停止') self.result_label.setText(self.info_label.format(0)) self.Outputs.corpus.send(None) @search.on_result def on_result(self, result): self.result = result self.result_label.setText( self.info_label.format(len(result) if result else 0)) self.search_button.setText('查询') self.set_text_features() self.progressBarFinished(None) def set_text_features(self): self.Warning.no_text_fields.clear() if not self.text_includes: self.Warning.no_text_fields() if self.result is not None: vars_ = [ var for var in self.result.domain.metas if var.name in self.text_includes ] self.result.set_text_features(vars_ or None) self.Outputs.corpus.send(self.result) def send_report(self): if self.result: items = (('语言', code2lang[self.language]), ('查询', self.query_edit.toPlainText()), ('文档数量', len(self.result))) self.report_items('Query', items)
def test_network_errors(self): on_error = mock.MagicMock() api = WikipediaAPI(on_error=on_error) api.search('en', ['Barack Obama']) self.assertEqual(on_error.call_count, 1)
def test_search_disambiguation(self): api = WikipediaAPI() result = api.search('en', ['Scarf'], articles_per_query=3) self.assertIsInstance(result, Corpus) self.assertGreater(len(result), 3)
class OWWikipedia(OWWidget): """ Get articles from wikipedia. """ name = 'Wikipedia' priority = 160 icon = 'icons/Wikipedia.svg' class Outputs: corpus = Output("Corpus", Corpus) want_main_area = False resizing_enabled = False label_width = 1 widgets_width = 2 attributes = [feat.name for feat in WikipediaAPI.string_attributes] text_includes = settings.Setting([feat.name for feat in WikipediaAPI.string_attributes]) query_list = settings.Setting([]) language = settings.Setting('en') articles_per_query = settings.Setting(10) info_label = 'Articles count {:d}' class Error(OWWidget.Error): api_error = Msg('API error: {}') class Warning(OWWidget.Warning): no_text_fields = Msg('Text features are inferred when none are selected.') def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.api = WikipediaAPI(on_error=self.Error.api_error) self.result = None query_box = gui.hBox(self.controlArea, 'Query') # Queries configuration layout = QGridLayout() layout.setSpacing(7) row = 0 self.query_edit = ListEdit(self, 'query_list', "Each line represents a " "separate query.", 100, self) layout.addWidget(QLabel('Query word list:'), row, 0, 1, self.label_width) layout.addWidget(self.query_edit, row, self.label_width, 1, self.widgets_width) # Language row += 1 language_edit = ComboBox(self, 'language', tuple(sorted(lang2code.items()))) layout.addWidget(QLabel('Language:'), row, 0, 1, self.label_width) layout.addWidget(language_edit, row, self.label_width, 1, self.widgets_width) # Articles per query row += 1 layout.addWidget(QLabel('Articles per query:'), row, 0, 1, self.label_width) slider = gui.valueSlider(query_box, self, 'articles_per_query', box='', values=[1, 3, 5, 10, 25]) layout.addWidget(slider.box, row, 1, 1, self.widgets_width) query_box.layout().addLayout(layout) self.controlArea.layout().addWidget(query_box) self.controlArea.layout().addWidget( CheckListLayout('Text includes', self, 'text_includes', self.attributes, cols=2, callback=self.set_text_features)) self.info_box = gui.hBox(self.controlArea, 'Info') self.result_label = gui.label(self.info_box, self, self.info_label.format(0)) self.button_box = gui.hBox(self.controlArea) self.search_button = gui.button(self.button_box, self, 'Search', self.start_stop) self.search_button.setFocusPolicy(Qt.NoFocus) def start_stop(self): if self.search.running: self.search.stop() else: self.search() @asynchronous def search(self): return self.api.search(lang=self.language, queries=self.query_list, articles_per_query=self.articles_per_query, on_progress=self.progress_with_info, should_break=self.search.should_break) @search.callback(should_raise=False) def progress_with_info(self, progress, n_retrieved): self.progressBarSet(100 * progress, None) self.result_label.setText(self.info_label.format(n_retrieved)) @search.on_start def on_start(self): self.Error.api_error.clear() self.progressBarInit(None) self.search_button.setText('Stop') self.result_label.setText(self.info_label.format(0)) self.Outputs.corpus.send(None) @search.on_result def on_result(self, result): self.result = result self.result_label.setText(self.info_label.format(len(result) if result else 0)) self.search_button.setText('Search') self.set_text_features() self.progressBarFinished(None) def set_text_features(self): self.Warning.no_text_fields.clear() if not self.text_includes: self.Warning.no_text_fields() if self.result is not None: vars_ = [var for var in self.result.domain.metas if var.name in self.text_includes] self.result.set_text_features(vars_ or None) self.Outputs.corpus.send(self.result) def send_report(self): if self.result: items = (('Language', code2lang[self.language]), ('Query', self.query_edit.toPlainText()), ('Articles count', len(self.result))) self.report_items('Query', items)
class OWWikipedia(OWConcurrentWidget): """ Get articles from wikipedia. """ name = "Wikipedia" priority = 27 icon = "icons/Wikipedia.svg" outputs = [(IO.CORPUS, Corpus)] want_main_area = False resizing_enabled = False label_width = 1 widgets_width = 2 attributes = [feat.name for feat in WikipediaAPI.string_attributes] text_includes = settings.Setting([feat.name for feat in WikipediaAPI.string_attributes]) query_list = settings.Setting([]) language = settings.Setting("en") articles_per_query = settings.Setting(10) info_label = "Articles count {:d}" class Error(OWWidget.Error): api_error = Msg("API error: {}") class Warning(OWWidget.Warning): no_text_fields = Msg("Text features are inferred when none are selected.") def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.api = WikipediaAPI(on_error=self.Error.api_error) self.result = None query_box = gui.hBox(self.controlArea, "Query") # Queries configuration layout = QGridLayout() layout.setSpacing(7) row = 0 query_edit = ListEdit(self, "query_list", "Each line represents a separate query.", None, self) layout.addWidget(QLabel("Query word list:"), row, 0, 1, self.label_width) layout.addWidget(query_edit, row, self.label_width, 1, self.widgets_width) # Language row += 1 language_edit = ComboBox(self, "language", tuple(sorted(lang2code.items()))) layout.addWidget(QLabel("Language:"), row, 0, 1, self.label_width) layout.addWidget(language_edit, row, self.label_width, 1, self.widgets_width) # Articles per query row += 1 layout.addWidget(QLabel("Articles per query:"), row, 0, 1, self.label_width) slider = gui.valueSlider(query_box, self, "articles_per_query", box="", values=[1, 3, 5, 10, 25]) layout.addWidget(slider.box, row, 1, 1, self.widgets_width) query_box.layout().addLayout(layout) self.controlArea.layout().addWidget(query_box) self.controlArea.layout().addWidget( CheckListLayout( "Text includes", self, "text_includes", self.attributes, cols=2, callback=self.set_text_features ) ) self.info_box = gui.hBox(self.controlArea, "Info") self.result_label = gui.label(self.info_box, self, self.info_label.format(0)) self.button_box = gui.hBox(self.controlArea) self.button_box.layout().addWidget(self.report_button) self.search_button = gui.button(self.button_box, self, "Search", self.start_stop) self.search_button.setFocusPolicy(Qt.NoFocus) def start_stop(self): if self.running: self.stop() else: self.search() @asynchronous(allow_partial_results=True) def search(self, on_progress, should_break): def progress_with_info(progress, n_retrieved): on_progress(100 * progress) self.result_label.setText(self.info_label.format(n_retrieved)) return self.api.search( lang=self.language, queries=self.query_list, articles_per_query=self.articles_per_query, on_progress=progress_with_info, should_break=should_break, ) def on_start(self): self.Error.api_error.clear() self.search_button.setText("Stop") self.result_label.setText(self.info_label.format(0)) self.send(IO.CORPUS, None) def on_result(self, result): self.result = result self.result_label.setText(self.info_label.format(len(result) if result else 0)) self.search_button.setText("Search") self.set_text_features() def set_text_features(self): self.Warning.no_text_fields.clear() if not self.text_includes: self.Warning.no_text_fields() if self.result is not None: vars_ = [var for var in self.result.domain.metas if var.name in self.text_includes] self.result.set_text_features(vars_ or None) self.send(IO.CORPUS, self.result) def send_report(self): if self.result: items = ( ("Language", code2lang[self.language]), ("Query", self.query_list), ("Articles count", len(self.result)), ) self.report_items("Query", items)
def test_search_disambiguation(self): api = WikipediaAPI() result = api.search('en', ['Scarf'], articles_per_query=3) self.assertIsInstance(result, Corpus) self.assertGreaterEqual(len(result), 3)