def recreate_index(name, mapping, rebuild=False, delete_old=True): """Recreate an ElasticSearch index.""" if rebuild: from copy import deepcopy original_es_hosts = deepcopy(es.transport.hosts) try: # The reindexing plugin can work only with one client. es.transport.hosts = es.transport.hosts[:1] es.transport.set_connections(es.transport.hosts) current_index = es.indices.get_alias(name).keys()[0] future_index = name + '_v2' if current_index.endswith('_v1') else name + '_v1' original_number_of_documents = es.count(current_index)['count'] es.indices.delete(index=future_index, ignore=404) es.indices.create(index=future_index, body=mapping) es.indices.put_settings(index=current_index, body={'index': {'blocks': {'read_only': True}}}) es.indices.put_settings(index=future_index, body={'index': {'refresh_interval': -1}}) try: code, answer = es.cat.transport.perform_request('POST', '/{}/_reindex/{}/'.format(current_index, future_index)) assert code == 200 assert answer['acknowledged'] reindex_name = answer['name'] while reindex_name in es.cat.transport.perform_request('GET', '/_reindex/')[1]['names']: # Let's poll to wait for finishing sleep(3) es.indices.flush(future_index, wait_if_ongoing=True) if original_number_of_documents != es.count(future_index)['count']: click.echo("ERROR when reindexing {current_index} into {future_index}. Bailing out.".format( current_index=current_index, future_index=future_index)) return False finally: es.indices.put_settings(index=current_index, body={'index': {'blocks': {'read_only': False}}}) es.indices.put_settings(index=future_index, body={'index': {'refresh_interval': "1s"}}) es.indices.forcemerge(index=future_index, max_num_segments=5) es.indices.put_alias(index=future_index, name=name) if delete_old: es.indices.delete(index=current_index) finally: # We restore all the correct connections es.transport.hosts = original_es_hosts es.transport.set_connections(es.transport.hosts) else: es.indices.delete(index=name + "_v1", ignore=404) es.indices.delete(index=name + "_v2", ignore=404) es.indices.create(index=name + "_v1", body=mapping) es.indices.put_alias(index=name + "_v1", name=name) return True
def test_reindexing(self): """Test simple reindexing of HEP""" from invenio_ext.es import es from inspirehep.manage import recreate_index # NOTE: currently, on Travis we have to disable the read_only functionality # since it seems to not work properly in that context. self.assert_(recreate_index(self.name, self.mapping, rebuild=True, delete_old=True)) self.assertEqual(es.indices.get_alias(self.name).keys()[0], self.future_index) self.assertEqual(es.count(self.future_index)['count'], self.current_count)
def setUp(self): from invenio_ext.es import es from invenio_search.registry import mappings self.name = 'hep' self.mapping_filename = self.name + ".json" self.mapping = json.load(open(mappings[self.mapping_filename], "r")) self.current_index = es.indices.get_alias(self.name).keys()[0] self.future_index = self.name + '_v2' if self.current_index.endswith('_v1') else self.name + '_v1' self.current_count = es.count(self.current_index)['count']