def test_run_block_with_large_work_mem(self): normal_work_mem = 256 # MB large_work_mem = 512 # MB old_large_work_mem = None config = py_get_config() if 'large_work_mem' in config['mediawords']: old_large_work_mem = config['mediawords']['large_work_mem'] config['mediawords']['large_work_mem'] = '%dMB' % large_work_mem py_set_config(config) self.db().query("SET work_mem TO %s", ('%sMB' % normal_work_mem, )) current_work_mem = int(self.db().query(""" SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """).flat()[0]) assert current_work_mem == normal_work_mem * 1024 def __test_run_block_with_large_work_mem_inner(): self.db().execute_with_large_work_mem(""" INSERT INTO execute_large_work_mem (work_mem) SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """) self.db().query( 'CREATE TEMPORARY TABLE execute_large_work_mem (work_mem INT NOT NULL)' ) self.db().run_block_with_large_work_mem( __test_run_block_with_large_work_mem_inner) statement_work_mem = int(self.db().query(""" SELECT work_mem FROM execute_large_work_mem """).flat()[0]) assert statement_work_mem == large_work_mem * 1024 current_work_mem = int(self.db().query(""" SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """).flat()[0]) assert current_work_mem == normal_work_mem * 1024 config['mediawords']['large_work_mem'] = old_large_work_mem py_set_config(config)
def test_run_block_with_large_work_mem(self): normal_work_mem = 256 # MB large_work_mem = 512 # MB old_large_work_mem = None config = py_get_config() if 'large_work_mem' in config['mediawords']: old_large_work_mem = config['mediawords']['large_work_mem'] config['mediawords']['large_work_mem'] = '%dMB' % large_work_mem py_set_config(config) self.db().query("SET work_mem TO %s", ('%sMB' % normal_work_mem,)) current_work_mem = int(self.db().query(""" SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """).flat()[0]) assert current_work_mem == normal_work_mem * 1024 def __test_run_block_with_large_work_mem_inner(): self.db().execute_with_large_work_mem(""" INSERT INTO execute_large_work_mem (work_mem) SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """) self.db().query('CREATE TEMPORARY TABLE execute_large_work_mem (work_mem INT NOT NULL)') self.db().run_block_with_large_work_mem(__test_run_block_with_large_work_mem_inner) statement_work_mem = int(self.db().query(""" SELECT work_mem FROM execute_large_work_mem """).flat()[0]) assert statement_work_mem == large_work_mem * 1024 current_work_mem = int(self.db().query(""" SELECT setting::INT FROM pg_settings WHERE name = 'work_mem' """).flat()[0]) assert current_work_mem == normal_work_mem * 1024 config['mediawords']['large_work_mem'] = old_large_work_mem py_set_config(config)
def test_nyt_labels_annotator(self): media = self.db().create(table='media', insert_hash={ 'name': "test medium", 'url': "url://test/medium", }) story = self.db().create(table='stories', insert_hash={ 'media_id': media['media_id'], 'url': 'url://story/a', 'guid': 'guid://story/a', 'title': 'story a', 'description': 'description a', 'publish_date': sql_now(), 'collect_date': sql_now(), 'full_text_rss': True, }) stories_id = story['stories_id'] self.db().create(table='story_sentences', insert_hash={ 'stories_id': stories_id, 'sentence_number': 1, 'sentence': 'I hope that the CLIFF annotator is working.', 'media_id': media['media_id'], 'publish_date': sql_now(), 'language': 'en' }) def __nyt_labels_sample_response(_: HashServer.Request) -> Union[str, bytes]: """Mock annotator.""" response = "" response += "HTTP/1.0 200 OK\r\n" response += "Content-Type: application/json; charset=UTF-8\r\n" response += "\r\n" response += encode_json(self.__sample_nyt_labels_response()) return response pages = { '/predict.json': { 'callback': __nyt_labels_sample_response, } } port = random_unused_port() annotator_url = 'http://localhost:%d/predict.json' % port hs = HashServer(port=port, pages=pages) hs.start() # Inject NYTLabels credentials into configuration config = py_get_config() new_config = copy.deepcopy(config) new_config['nytlabels'] = { 'enabled': True, 'annotator_url': annotator_url, } py_set_config(new_config) nytlabels = NYTLabelsAnnotator() nytlabels.annotate_and_store_for_story(db=self.db(), stories_id=stories_id) nytlabels.update_tags_for_story(db=self.db(), stories_id=stories_id) hs.stop() # Reset configuration py_set_config(config) annotation_exists = self.db().query(""" SELECT 1 FROM nytlabels_annotations WHERE object_id = %(object_id)s """, {'object_id': stories_id}).hash() assert annotation_exists is not None story_tags = self.db().query(""" SELECT tags.tag AS tags_name, tags.label AS tags_label, tags.description AS tags_description, tag_sets.name AS tag_sets_name, tag_sets.label AS tag_sets_label, tag_sets.description AS tag_sets_description FROM stories_tags_map INNER JOIN tags ON stories_tags_map.tags_id = tags.tags_id INNER JOIN tag_sets ON tags.tag_sets_id = tag_sets.tag_sets_id WHERE stories_tags_map.stories_id = %(stories_id)s ORDER BY tags.tag COLLATE "C", tag_sets.name COLLATE "C" """, {'stories_id': stories_id}).hashes() expected_tags = self.__expected_tags() assert story_tags == expected_tags
def test_nyt_labels_annotator(self): media = self.db().create(table='media', insert_hash={ 'name': "test medium", 'url': "url://test/medium", }) story = self.db().create(table='stories', insert_hash={ 'media_id': media['media_id'], 'url': 'url://story/a', 'guid': 'guid://story/a', 'title': 'story a', 'description': 'description a', 'publish_date': sql_now(), 'collect_date': sql_now(), 'full_text_rss': True, }) stories_id = story['stories_id'] self.db().create(table='story_sentences', insert_hash={ 'stories_id': stories_id, 'sentence_number': 1, 'sentence': 'I hope that the CLIFF annotator is working.', 'media_id': media['media_id'], 'publish_date': sql_now(), 'language': 'en' }) def __nyt_labels_sample_response( _: HashServer.Request) -> Union[str, bytes]: """Mock annotator.""" response = "" response += "HTTP/1.0 200 OK\r\n" response += "Content-Type: application/json; charset=UTF-8\r\n" response += "\r\n" response += encode_json(self.__sample_nyt_labels_response()) return response pages = { '/predict.json': { 'callback': __nyt_labels_sample_response, } } port = random_unused_port() annotator_url = 'http://localhost:%d/predict.json' % port hs = HashServer(port=port, pages=pages) hs.start() # Inject NYTLabels credentials into configuration config = py_get_config() new_config = copy.deepcopy(config) new_config['nytlabels'] = { 'enabled': True, 'annotator_url': annotator_url, } py_set_config(new_config) nytlabels = NYTLabelsAnnotator() nytlabels.annotate_and_store_for_story(db=self.db(), stories_id=stories_id) nytlabels.update_tags_for_story(db=self.db(), stories_id=stories_id) hs.stop() # Reset configuration py_set_config(config) annotation_exists = self.db().query( """ SELECT 1 FROM nytlabels_annotations WHERE object_id = %(object_id)s """, { 'object_id': stories_id }).hash() assert annotation_exists is not None story_tags = self.db().query( """ SELECT tags.tag AS tags_name, tags.label AS tags_label, tags.description AS tags_description, tag_sets.name AS tag_sets_name, tag_sets.label AS tag_sets_label, tag_sets.description AS tag_sets_description FROM stories_tags_map INNER JOIN tags ON stories_tags_map.tags_id = tags.tags_id INNER JOIN tag_sets ON tags.tag_sets_id = tag_sets.tag_sets_id WHERE stories_tags_map.stories_id = %(stories_id)s ORDER BY tags.tag COLLATE "C", tag_sets.name COLLATE "C" """, { 'stories_id': stories_id }).hashes() expected_tags = self.__expected_tags() assert story_tags == expected_tags