Esempio n. 1
0
def recreate_index(name, mapping, rebuild=False, delete_old=True):
    """Recreate an ElasticSearch index."""
    if rebuild:
        from copy import deepcopy
        original_es_hosts = deepcopy(es.transport.hosts)
        try:
            # The reindexing plugin can work only with one client.
            es.transport.hosts = es.transport.hosts[:1]
            es.transport.set_connections(es.transport.hosts)
            current_index = es.indices.get_alias(name).keys()[0]
            future_index = name + '_v2' if current_index.endswith('_v1') else name + '_v1'
            original_number_of_documents = es.count(current_index)['count']
            es.indices.delete(index=future_index, ignore=404)
            es.indices.create(index=future_index, body=mapping)
            es.indices.put_settings(index=current_index, body={'index': {'blocks': {'read_only': True}}})
            es.indices.put_settings(index=future_index, body={'index': {'refresh_interval': -1}})
            try:
                code, answer = es.cat.transport.perform_request('POST', '/{}/_reindex/{}/'.format(current_index, future_index))
                assert code == 200
                assert answer['acknowledged']
                reindex_name = answer['name']
                while reindex_name in es.cat.transport.perform_request('GET', '/_reindex/')[1]['names']:
                    # Let's poll to wait for finishing
                    sleep(3)
                es.indices.flush(future_index, wait_if_ongoing=True)
                if original_number_of_documents != es.count(future_index)['count']:
                    click.echo("ERROR when reindexing {current_index} into {future_index}. Bailing out.".format(
                        current_index=current_index,
                        future_index=future_index))
                    return False
            finally:
                es.indices.put_settings(index=current_index, body={'index': {'blocks': {'read_only': False}}})
                es.indices.put_settings(index=future_index, body={'index': {'refresh_interval': "1s"}})
                es.indices.forcemerge(index=future_index, max_num_segments=5)

            es.indices.put_alias(index=future_index, name=name)
            if delete_old:
                es.indices.delete(index=current_index)
        finally:
            # We restore all the correct connections
            es.transport.hosts = original_es_hosts
            es.transport.set_connections(es.transport.hosts)
    else:
        es.indices.delete(index=name + "_v1", ignore=404)
        es.indices.delete(index=name + "_v2", ignore=404)
        es.indices.create(index=name + "_v1", body=mapping)
        es.indices.put_alias(index=name + "_v1", name=name)
    return True
 def test_reindexing(self):
     """Test simple reindexing of HEP"""
     from invenio_ext.es import es
     from inspirehep.manage import recreate_index
     # NOTE: currently, on Travis we have to disable the read_only functionality
     # since it seems to not work properly in that context.
     self.assert_(recreate_index(self.name, self.mapping, rebuild=True, delete_old=True))
     self.assertEqual(es.indices.get_alias(self.name).keys()[0], self.future_index)
     self.assertEqual(es.count(self.future_index)['count'], self.current_count)
 def setUp(self):
     from invenio_ext.es import es
     from invenio_search.registry import mappings
     self.name = 'hep'
     self.mapping_filename = self.name + ".json"
     self.mapping = json.load(open(mappings[self.mapping_filename], "r"))
     self.current_index = es.indices.get_alias(self.name).keys()[0]
     self.future_index = self.name + '_v2' if self.current_index.endswith('_v1') else self.name + '_v1'
     self.current_count = es.count(self.current_index)['count']