Beispiel #1
1
def reindex(src, dest):
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)
    def reindex(self):

        elastic_client = Elasticsearch([{
            "host": self.__host,
            "port": self.__port
        }])
        index_client = IndicesClient(elastic_client)

        # Create new index with necessory fields mapping
        # , master_timeout=10, timeout=10
        index_client.create(index=self.__target_index, body=self.__body)

        # reindexind data from source index to target index
        helpers.reindex(client=elastic_client,
                        source_index=self.__source_index,
                        target_index=self.__target_index)

        # creating alias for target index
        alias = {'actions': []}
        # remove_action = {"remove": {"index": self.__source_index, "alias": self.__alias}}
        add_action = {
            "add": {
                "index": self.__target_index,
                "alias": self.__alias
            }
        }
        # alias['actions'].append(remove_action)
        alias['actions'].append(add_action)

        # deleteing the source index
        index_client.delete(index=self.__source_index)
        index_client.update_aliases(body=alias)
 def run(self, index_name=None):
     # if no index name is passed then use the configured one
     index_name = index_name if index_name else superdesk.app.config['ELASTICSEARCH_INDEX']
     print('Starting index rebuilding for index: {}'.format(index_name))
     if index_name not in self._get_available_indexes():
         raise Exception("Index {} is not configured".format(index_name))
     try:
         es = get_es(superdesk.app.config['ELASTICSEARCH_URL'])
         clone_name = index_name + '-' + get_random_string()
         print('Creating index: ', clone_name)
         superdesk.app.data.elastic.create_index(clone_name, superdesk.app.config['ELASTICSEARCH_SETTINGS'])
         real_name = superdesk.app.data.elastic.get_index_by_alias(clone_name)
         print('Putting mapping for index: ', clone_name)
         superdesk.app.data.elastic.put_mapping(superdesk.app, clone_name)
         print('Starting index rebuilding.')
         reindex(es, index_name, clone_name)
         print('Finished index rebuilding.')
         print('Deleting index: ', index_name)
         get_indices(es).delete(index_name)
         print('Creating alias: ', index_name)
         get_indices(es).put_alias(index=real_name, name=index_name)
         print('Alias created.')
         print('Deleting clone name alias')
         get_indices(es).delete_alias(name=clone_name, index=real_name)
         print('Deleted clone name alias')
     except elasticsearch.exceptions.NotFoundError as nfe:
         print(nfe)
     print('Index {0} rebuilt successfully.'.format(index_name))
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, 'test-index', 'real-index')

        q = self.client.get(index='real-index', doc_type='question', id=42)
        self.assertEquals(
            {
                '_id': '42',
                '_index': 'real-index',
                '_source': {},
                '_type': 'question',
                '_version': 1,
                'found': True
            }, q)
        q = self.client.get(index='test-index',
                            doc_type='answer',
                            id=47,
                            parent=42)
        if '_routing' in q:
            self.assertEquals(q.pop('_routing'), '42')
        self.assertEquals(
            {
                '_id': '47',
                '_index': 'test-index',
                '_source': {
                    'some': 'data'
                },
                '_type': 'answer',
                '_version': 1,
                '_parent': '42',
                'found': True
            }, q)
    def test_reindex_accepts_a_query(self):
        helpers.reindex(self.client,
                        "test_index",
                        "prod_index",
                        query={
                            "query": {
                                "filtered": {
                                    "filter": {
                                        "term": {
                                            "_type": "answers"
                                        }
                                    }
                                }
                            }
                        })
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertFalse(
            self.client.indices.exists_type(index='prod_index',
                                            doc_type='questions'))
        self.assertEquals(
            50,
            self.client.count(index='prod_index', doc_type='answers')['count'])

        self.assertEquals({
            "answer": 42,
            "correct": True
        },
                          self.client.get(index="prod_index",
                                          doc_type="answers",
                                          id=42)['_source'])
Beispiel #6
0
	def reindex(self, idx, newindex, alias_name=None, remap_alias=None, **kwargs):
		# are we an alias or an actual index?
		index = idx;
		alias = None
		alias_exists = False

		if self._esc.exists_alias(idx):
			alias = idx
			index = self.get_index_for_alias(idx)
			alias_exists = True

		if alias_name:
			alias = alias_name

		# does the new index exist?
		if not self._esc.exists( newindex ):
			# if new doesn't exist then create the mapping
			# as a copy of the old one. The idea being that the mapping
			# was changed
			index_mapping = self.get_index_map(index=idx) # using "idx" intentionally because models will be defined as alias
			self._esc.create( index=newindex, body=index_mapping.get(idx)) # have to use the index name as the key to the dict even though only one is returned.  .create() only takes the mapping

		# map our documents
		helpers.reindex(self._es, index, newindex, **kwargs)

		if remap_alias or alias_name:
			if alias_exists and alias != alias_name:
				self._esc.delete_alias(alias)

			self._esc.put_alias(name=alias,index=newindex)
Beispiel #7
0
 def reindex_func(src_idx, dst_idx):
     es = Elasticsearch(self.es_clienthost)
     try:
         helpers.reindex(es, src_idx, dst_idx)
     except:
         print('Reindex failed for src_idx ' + src_idx +
               ' into dst_idx' + dst_idx)
Beispiel #8
0
    def test_all_documents_get_moved(self):
        bulk = []
        for x in range(100):
            bulk.append({
                "index": {
                    "_index": "test_index",
                    "_type": "answers" if x % 2 == 0 else "questions",
                    "_id": x
                }
            })
            bulk.append({"answer": x, "correct": x == 42})
        self.client.bulk(bulk, refresh=True)

        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(
            50,
            self.client.count(index='prod_index',
                              doc_type='questions')['count'])
        self.assertEquals(
            50,
            self.client.count(index='prod_index', doc_type='answers')['count'])

        self.assertEquals({
            "answer": 42,
            "correct": True
        },
                          self.client.get(index="prod_index",
                                          doc_type="answers",
                                          id=42)['_source'])
    def test_reindex_passes_kwargs_to_scan_and_bulk(self):
        helpers.reindex(self.client, "test_index", "prod_index", scan_kwargs={'q': 'type:answers'}, bulk_kwargs={'refresh': True})

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(50, self.client.count(index='prod_index', q='type:answers')['count'])

        self.assertEquals({"answer": 42, "correct": True, "type": "answers"}, self.client.get(index="prod_index", doc_type="post", id=42)['_source'])
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, 'test-index', 'real-index')

        q = self.client.get(
            index='real-index',
            doc_type='post',
            id=42
        )
        self.assertEquals(
            {
                '_id': '42',
                '_index': 'real-index',
                '_source': {'question_answer': 'question'},
                '_type': 'post',
                '_version': 1,
                'found': True
            }, q
        )
        q = self.client.get(
            index='test-index',
            doc_type='post',
            id=47,
            routing=42
        )
        self.assertEquals(
            {
                '_routing': '42',
                '_id': '47',
                '_index': 'test-index',
                '_source': {'some': 'data', 'question_answer': {'name': 'answer', 'parent': 42}},
                '_type': 'post',
                '_version': 1,
                'found': True
            }, q
        )
Beispiel #11
0
def backup_index(es_source, es_target, source_index, target_index):
    """
    Warning
    ------------------------
    The existing target index is deleted.
    """
    # delete old
    if es_target.indices.exists(target_index):
        es_target.indices.delete(target_index)

    # index setup
    mappings = es_source.indices.get_mapping(
        index=source_index)[source_index]['mappings']
    settings = es_source.indices.get_settings(
        index=source_index)[source_index]['settings']
    setup_index_template(es_target, target_index, settings, mappings)
    if not _mappings_checking(es_source, es_target, source_index,
                              target_index):
        raise Exception('mappings is not copied correctly')
    if not _settings_checking(es_source, es_target, source_index,
                              target_index):
        raise Exception('settings is not copied correctly')

    # data
    es_source.indices.refresh(source_index)
    reindex(client=es_source,
            target_client=es_target,
            source_index=source_index,
            target_index=target_index,
            scroll=_SCROLL_TIME)
Beispiel #12
0
def reindex(args):
    """Reindex the annotations into a new Elasticsearch index."""
    request = bootstrap(args)

    # Configure the new index
    search_config.configure_index(request.es, args.target)

    # Reindex the annotations
    es_helpers.reindex(client=request.es.conn,
                       source_index=request.es.index,
                       target_index=args.target)

    if args.update_alias:
        request.es.conn.indices.update_aliases(
            body={
                'actions': [
                    # Remove all existing aliases
                    {
                        "remove": {
                            "index": "*",
                            "alias": request.es.index
                        }
                    },
                    # Alias current index name to new target
                    {
                        "add": {
                            "index": args.target,
                            "alias": request.es.index
                        }
                    },
                ]
            })
Beispiel #13
0
 def run(self, index_name=None):
     # if no index name is passed then use the configured one
     index_name = index_name if index_name else superdesk.app.config['ELASTICSEARCH_INDEX']
     print('Starting index rebuilding for index: {}'.format(index_name))
     if index_name not in self._get_available_indexes():
         raise Exception("Index {} is not configured".format(index_name))
     try:
         es = get_es(superdesk.app.config['ELASTICSEARCH_URL'])
         clone_name = index_name + '-' + get_random_string()
         print('Creating index: ', clone_name)
         superdesk.app.data.elastic.create_index(clone_name, superdesk.app.config['ELASTICSEARCH_SETTINGS'])
         real_name = superdesk.app.data.elastic.get_index_by_alias(clone_name)
         print('Putting mapping for index: ', clone_name)
         superdesk.app.data.elastic.put_mapping(superdesk.app, clone_name)
         print('Starting index rebuilding.')
         reindex(es, index_name, clone_name)
         print('Finished index rebuilding.')
         print('Deleting index: ', index_name)
         get_indices(es).delete(index_name)
         print('Creating alias: ', index_name)
         get_indices(es).put_alias(index=real_name, name=index_name)
         print('Alias created.')
         print('Deleting clone name alias')
         get_indices(es).delete_alias(name=clone_name, index=real_name)
         print('Deleted clone name alias')
     except elasticsearch.exceptions.NotFoundError as nfe:
         print(nfe)
     print('Index {0} rebuilt successfully.'.format(index_name))
Beispiel #14
0
    def test_reindex_accepts_a_query(self):
        helpers.reindex(
            self.client,
            "test_index",
            "prod_index",
            query={
                "query": {
                    "bool": {
                        "filter": {
                            "term": {
                                "type": "answers"
                            }
                        }
                    }
                }
            },
        )
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEqual(
            50,
            self.client.count(index="prod_index", q="type:answers")["count"])

        self.assertEqual(
            {
                "answer": 42,
                "correct": True,
                "type": "answers"
            },
            self.client.get(index="prod_index", id=42)["_source"],
        )
Beispiel #15
0
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, 'test-index', 'real-index')

        q = self.client.get(index='real-index', doc_type='post', id=42)
        self.assertEquals(
            {
                '_id': '42',
                '_index': 'real-index',
                '_source': {
                    'question_answer': 'question'
                },
                '_type': 'post',
                '_version': 1,
                'found': True
            }, q)
        q = self.client.get(index='test-index',
                            doc_type='post',
                            id=47,
                            routing=42)
        self.assertEquals(
            {
                '_routing': '42',
                '_id': '47',
                '_index': 'test-index',
                '_source': {
                    'some': 'data',
                    'question_answer': {
                        'name': 'answer',
                        'parent': 42
                    }
                },
                '_type': 'post',
                '_version': 1,
                'found': True
            }, q)
Beispiel #16
0
def clone_index(use_helper, from_index, to_index):
    """Clone an index"""
    from elasticsearch_dsl import Search
    from elasticsearch.helpers import reindex

    click.clear()

    if not es.client.indices.exists(index=to_index):
        click.secho('%s not existing!'.format(to_index), fg='red')
        return 1

    cnt = Search(using=es.client, index=to_index).count()
    message = 'Index %s already exists (%d records). Overwrite?' % (to_index,
                                                                    cnt)

    click.confirm(message, abort=True)

    if use_helper:
        reindex(client=es.client,
                source_index=from_index,
                target_index=to_index)
    else:
        es.client.reindex(body=dict(source=dict(index=from_index),
                                    dest=dict(index=to_index)),
                          wait_for_completion=False)
Beispiel #17
0
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, "test-index", "real-index")

        q = self.client.get(index="real-index", id=42)
        self.assertEqual(
            {
                "_id": "42",
                "_index": "real-index",
                "_primary_term": 1,
                "_seq_no": 0,
                "_source": {"question_answer": "question"},
                "_version": 1,
                "found": True,
            },
            q,
        )
        q = self.client.get(index="test-index", id=47, routing=42)
        self.assertEqual(
            {
                "_routing": "42",
                "_id": "47",
                "_index": "test-index",
                "_primary_term": 1,
                "_seq_no": 1,
                "_source": {
                    "some": "data",
                    "question_answer": {"name": "answer", "parent": 42},
                },
                "_version": 1,
                "found": True,
            },
            q,
        )
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, "test-index", "real-index")

        q = self.client.get(index="real-index", id=42)
        self.assertEquals(
            {
                "_id": "42",
                "_index": "real-index",
                "_primary_term": 1,
                "_seq_no": 0,
                "_source": {"question_answer": "question"},
                "_type": "_doc",
                "_version": 1,
                "found": True,
            },
            q,
        )
        q = self.client.get(index="test-index", id=47, routing=42)
        self.assertEquals(
            {
                "_routing": "42",
                "_id": "47",
                "_index": "test-index",
                "_primary_term": 1,
                "_seq_no": 1,
                "_source": {
                    "some": "data",
                    "question_answer": {"name": "answer", "parent": 42},
                },
                "_type": "_doc",
                "_version": 1,
                "found": True,
            },
            q,
        )
    def test_reindex_passes_kwargs_to_scan_and_bulk(self):
        helpers.reindex(self.client, "test_index", "prod_index", scan_kwargs={'doc_type': 'answers'}, bulk_kwargs={'refresh': True})

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertFalse(self.client.indices.exists_type(index='prod_index', doc_type='questions'))
        self.assertEquals(50, self.client.count(index='prod_index', doc_type='answers')['count'])

        self.assertEquals({"answer": 42, "correct": True}, self.client.get(index="prod_index", doc_type="answers", id=42)['_source'])
def reindex_data(data):
	try:
		request_data = json.loads(data)
		es = Elasticsearch()
		helpers.reindex(client=es, source_index=request_data.get("source"), target_index=request_data.get("target"))
		return {"data":"sucess"}
	except elasticsearch.ElasticsearchException,e:
		return {"new":dir(e),"mes":e.message, "error":e.errors}
    def test_reindex_accepts_a_query(self):
        helpers.reindex(self.client, "test_index", "prod_index", query={"query": {"bool": {"filter": {"term": {"type": "answers"}}}}})
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(50, self.client.count(index='prod_index', q='type:answers')['count'])

        self.assertEquals({"answer": 42, "correct": True, "type": "answers"}, self.client.get(index="prod_index", doc_type="post", id=42)['_source'])
Beispiel #22
0
 def history(self):
     Logger.debug("{},begin history!".format(datetime.datetime.now()))
     helpers.reindex(self.es,
                     self.active_index,
                     self.history_index,
                     query=self.query_body,
                     chunk_size=1000)
     Logger.debug("{},history finished!".format(datetime.datetime.now()))
def reindex_data(data):
	try:
		request_data = json.loads(data)
		es = Elasticsearch()
		helpers.reindex(client=es, source_index=request_data.get("source"), target_index=request_data.get("target"))
		return {"data":"sucess"}
	except elasticsearch.ElasticsearchException,e:
		return {"new":dir(e),"mes":e.message, "error":e.errors}
    def test_all_documents_get_moved(self):
        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(50, self.client.count(index='prod_index', doc_type='questions')['count'])
        self.assertEquals(50, self.client.count(index='prod_index', doc_type='answers')['count'])

        self.assertEquals({"answer": 42, "correct": True}, self.client.get(index="prod_index", doc_type="answers", id=42)['_source'])
Beispiel #25
0
def reindex(src, dest):
    ''' Reindexes documents from index {src} to index {dest}'''
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)
Beispiel #26
0
    def execute(self):
        """
        Reindex elastic index using existing documents
        """
        client = elasticsearch.Elasticsearch(
            hosts=settings.ELASTIC_SEARCH_HOSTS)

        log.debug('Backup index %s to %s ', self.index, self.target)
        helpers.reindex(client, self.index, self.target)
Beispiel #27
0
def reindex():
    body = {"query": {"match_all": {}}}  # get all document in index
    helpers.reindex(es_src,
                    src_index_name,
                    des_index_name,
                    target_client=es_des,
                    query=body)

    print('Success reindexing')
    def test_all_documents_get_moved(self):
        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(50, self.client.count(index='prod_index', q='type:questions')['count'])
        self.assertEquals(50, self.client.count(index='prod_index', q='type:answers')['count'])

        self.assertEquals({"answer": 42, "correct": True, "type": "answers"}, self.client.get(index="prod_index", doc_type="post", id=42)['_source'])
Beispiel #29
0
def reindex(src, dest):
    ''' Reindexes documents from index {src} to index {dest}'''
    from elasticsearch import helpers
    from scrapi.processing.elasticsearch import DatabaseManager
    dm = DatabaseManager()
    dm.setup()

    helpers.reindex(dm.es, src, dest)
    dm.es.indices.delete(src)
Beispiel #30
0
 def reindex(self, source_index, target_index=None):
     if not target_index:
         target_index = source_index
     try:
         helpers.reindex(es, source_index, target_index)
     except Exception as e:
         self.logger.error(
             f'ERROR WHILE REINDEXING SOURCE: {source_index} TARGET: {target_index}'
             + traceback.format_exc())
         raise GeneralException(message=str(e))
Beispiel #31
0
    def execute(self):
        """
        Reindex elastic index using existing documents
        """
        client = elasticsearch.Elasticsearch(
            hosts=settings.ELASTIC_SEARCH_HOSTS,
            # sniff_on_start=True,
            retry_on_timeout=True)

        log.debug('Backup index %s to %s ', self.index, self.target)
        helpers.reindex(client, self.index, self.target)
def copy_es_index(index_source, index_destination):
    es = Elasticsearch()

    # Delete and create target index
    es.indices.delete(index=index_destination, ignore=[404])
    maybe_create_index(index_destination)

    reindex(es,
            source_index=index_source,
            target_index=index_destination,
            chunk_size=500)
 def run():
     # reindex the documents from the old index onto the new index
     helpers.reindex(elasticsearch, old_name, new_name)
     # setup the alias for the new index
     self.indices.put_alias(name=self.alias_name, index=new_name)
     # delete the old alias
     self.indices.delete_alias(name=self.alias_name, index=old_name)
     # delete all the documents in the old index (keep the mapping in case one
     # needs to role back to an older schema
     self.delete_by_query(index=old_name,
         body={"query": {"match_all": {}}})
Beispiel #34
0
    def clone_index(self, new_indexname, index_conf=None):
        '''Clone current index

           All entries of the current index will be copied into the newly
           created one named `new_indexname`

           :param index_conf: Configuration to be used in the new index creation.
                              This param will be passed directly to :py:func:`DB.create_index`
        '''
        log.debug("Cloning index '{}' into '{}'".format(self.index_name, new_indexname))
        self.create_index(indexname=new_indexname, index_conf=index_conf)
        reindex(self.es, self.index_name, new_indexname)
Beispiel #35
0
 def run():
     # reindex the documents from the old index onto the new index
     helpers.reindex(elasticsearch, old_name, new_name)
     # setup the alias for the new index
     self.indices.put_alias(name=self.alias_name, index=new_name)
     # delete the old alias
     self.indices.delete_alias(name=self.alias_name, index=old_name)
     # delete all the documents in the old index (keep the mapping in case one
     # needs to role back to an older schema
     self.delete_by_query(index=old_name,
                          body={"query": {
                              "match_all": {}
                          }})
Beispiel #36
0
def migrate():
    print("Starting migration...")

    elastic = connect_to_elasticsearch()
    index_name = create_new_index(elastic)

    if elastic.indices.exists(ELASTICSEARCH_ALIAS):
        print("Reindexing data from previous index...")
        reindex(elastic, ELASTICSEARCH_ALIAS, index_name)

    switch_alias_to_index(elastic, ELASTICSEARCH_ALIAS, index_name)

    print("Done.")
Beispiel #37
0
def migrate():
    print("Starting migration...")

    elastic = connect_to_elasticsearch()
    index_name = create_new_index(elastic)

    if elastic.indices.exists(ELASTICSEARCH_ALIAS):
        print("Reindexing data from previous index...")
        reindex(elastic, ELASTICSEARCH_ALIAS, index_name)

    switch_alias_to_index(elastic, ELASTICSEARCH_ALIAS, index_name)

    print("Done.")
Beispiel #38
0
def reindex():
    # To reindex a specified index and appends the new index with "-reindex" if the --new_index_name options has not been specified
    args = parse_args()

    src_index_name = args.reindex

    if args.new_index_name is not None:
        des_index_name = args.new_index_name
    else:
        des_index_name = src_index_name + "-reindex"

    helpers.reindex(es(), src_index_name, des_index_name)
    print(src_index_name + " has been reindexed to " + des_index_name)
Beispiel #39
0
def doreindex(srcidxlist,dstidx):
    print "reindexing to " + dstidx
    client = Elasticsearch([{'host':'localhost','port':9200}], sniff_on_start=True, sniff_on_connection_fail=True)
    for row in srcidxlist:
        print "Reindexing index " + row + " to " + dstidx + " please stand by and watch for errors"
        try:
            reindex(client,row,dstidx,scroll='5m')
        except BulkIndexError:
            print "Problems with one or more documents from the source index above due to incompatable data types. They will not be included in the destination index"
            sys.exc_clear()

    print "Confirm the documents were all reindexed\nrun curl -GET http://localhost:9200/_cat/indices?v | grep " + dstidx
    return
Beispiel #40
0
    def clone_index(self, new_indexname, index_conf=None):
        '''Clone current index

           All entries of the current index will be copied into the newly
           created one named `new_indexname`

           :param index_conf: Configuration to be used in the new index creation.
                              This param will be passed directly to :py:func:`DB.create_index`
        '''
        log.debug("Cloning index '{}' into '{}'".format(
            self.index_name, new_indexname))
        self.create_index(indexname=new_indexname, index_conf=index_conf)
        reindex(self.es, self.index_name, new_indexname)
    def test_all_documents_get_moved(self):
        bulk = []
        for x in range(100):
            bulk.append({"index": {"_index": "test_index", "_type": "answers" if x % 2 == 0 else "questions", "_id": x}})
            bulk.append({"answer": x, "correct": x == 42})
        self.client.bulk(bulk, refresh=True)

        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertTrue(self.client.indices.exists_type("prod_index", "answers"))
        self.assertTrue(self.client.indices.exists_type("prod_index", "questions"))
        self.assertEquals(100, self.client.count(index='prod_index')['count'])
Beispiel #42
0
def migrate_indexes(aggregate_indexes=None, forensic_indexes=None):
    """
    Updates index mappings

    Args:
        aggregate_indexes (list): A list of aggregate index names
        forensic_indexes (list): A list of forensic index names
    """
    version = 2
    if aggregate_indexes is None:
        aggregate_indexes = []
    if forensic_indexes is None:
        forensic_indexes = []
    for aggregate_index_name in aggregate_indexes:
        if not Index(aggregate_index_name).exists():
            continue
        aggregate_index = Index(aggregate_index_name)
        doc = "doc"
        fo_field = "published_policy.fo"
        fo = "fo"
        fo_mapping = aggregate_index.get_field_mapping(fields=[fo_field])
        fo_mapping = fo_mapping[list(fo_mapping.keys())[0]]["mappings"]
        if doc not in fo_mapping:
            continue

        fo_mapping = fo_mapping[doc][fo_field]["mapping"][fo]
        fo_type = fo_mapping["type"]
        if fo_type == "long":
            new_index_name = "{0}-v{1}".format(aggregate_index_name, version)
            body = {
                "properties": {
                    "published_policy.fo": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    }
                }
            }
            Index(new_index_name).create()
            Index(new_index_name).put_mapping(doc_type=doc, body=body)
            reindex(connections.get_connection(), aggregate_index_name,
                    new_index_name)
            Index(aggregate_index_name).delete()

    for forensic_index in forensic_indexes:
        pass
Beispiel #43
0
def main():
    args = parse_args()
    should_apply = args.apply
    print(should_apply)

    es = Elasticsearch([{'host': host}])

    print_count("Source [before]", es.count(index=source))
    print_count("Target [before]", es.count(index=target))

    if (args.apply):
        reindex(es, source, target, chunk_size=5000, scroll='30m')

    print_count("Source [after]", es.count(index=source))
    print_count("Target [after]", es.count(index=target))
Beispiel #44
0
def migrate_index(conn, index_name, current_version=INDEX_VERSION):
    new_index = create_index(
        conn,
        version=current_version,
        minor_version=0,
        setup_read_alias=False,
    )
    reindex(conn, source_index=index_name, target_index=new_index._name)
    swap_aliases(
        conn,
        old_index_name=index_name,
        new_index_name=new_index._name,
    )
    delete_index(conn, index_name)
    return new_index
    def run_request(self, index=None):
        if not index:
            self.show_index_list_panel(self.run)
            return

        if not sublime.ok_cancel_dialog("Are you sure you want to reindex?",
                                        ok_title='Reindex'):
            return

        sublime.status_message("Reindex: start ... please waite.")

        options = dict(client=self.client,
                       source_index=index,
                       target_index=index,
                       target_client=self.client,
                       chunk_size=self.settings.chunk_size,
                       scroll=self.settings.scroll_size,
                       scan_kwargs=dict(),
                       bulk_kwargs=dict(index=index,
                                        stats_only=False,
                                        expand_action_callback=expand_action))

        success, errors = reindex(**options)

        if errors:
            return dict(command=self.command_name,
                        index=index,
                        status="ERROR",
                        errors=errors)

        return dict(command=self.command_name,
                    index=index,
                    status="SUCCESS",
                    docs=success)
Beispiel #46
0
def reindex():
    """
    To reindex a specified index and appends the new index with "-reindex"
    if the --new_index_name options has not been specified
    """
    args = parse_args()

    src_index_name = args.reindex

    if args.new_index_name is not None:
        des_index_name = args.new_index_name
    else:
        des_index_name = src_index_name + "-reindex"

    helpers.reindex(esSrc(), src_index_name, des_index_name, None, esDest())
    print(src_index_name + " has been reindexed to " + des_index_name)
Beispiel #47
0
    def test_all_documents_get_moved(self):
        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEqual(
            50, self.client.count(index="prod_index", q="type:questions")["count"]
        )
        self.assertEqual(
            50, self.client.count(index="prod_index", q="type:answers")["count"]
        )

        self.assertEqual(
            {"answer": 42, "correct": True, "type": "answers"},
            self.client.get(index="prod_index", id=42)["_source"],
        )
    def test_all_documents_get_moved(self):
        helpers.reindex(self.client, "test_index", "prod_index")
        self.client.indices.refresh()

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(
            50, self.client.count(index="prod_index", q="type:questions")["count"]
        )
        self.assertEquals(
            50, self.client.count(index="prod_index", q="type:answers")["count"]
        )

        self.assertEquals(
            {"answer": 42, "correct": True, "type": "answers"},
            self.client.get(index="prod_index", id=42)["_source"],
        )
Beispiel #49
0
def reindex():
    # create index
    if not es.indices.exists("tweets_index"):
        res_index = es.indices.create(index="tweets_index", ignore=400, body=settings)
        print("index created: ", res_index)

    res = helpers.reindex(es,"tests_index","tweets_index")
    print(res)
    def test_reindex_passes_kwargs_to_scan_and_bulk(self):
        helpers.reindex(
            self.client,
            "test_index",
            "prod_index",
            scan_kwargs={"q": "type:answers"},
            bulk_kwargs={"refresh": True},
        )

        self.assertTrue(self.client.indices.exists("prod_index"))
        self.assertEquals(
            50, self.client.count(index="prod_index", q="type:answers")["count"]
        )

        self.assertEquals(
            {"answer": 42, "correct": True, "type": "answers"},
            self.client.get(index="prod_index", id=42)["_source"],
        )
Beispiel #51
0
def cp_index(src_client=None, src_index=None, target_client=None, target_index=None, chunk_size=1000):
    """ Reindexes from src to dst es """
    check_not_empty(src_client)
    check_not_empty(src_index)
    target_client = target_client or src_client
    target_index = target_index or src_index
    ok = cp_metadata(src_client, src_index, target_client, target_index)
    if ok:
        print "Copy documents..."
        reindex(
            client=src_client,
            source_index=src_index,
            target_client=target_client,
            target_index=target_index,
            chunk_size=chunk_size,
            query={"query": {"match_all": {}}}
        )
        print "Data copied!"
    def reindex(self, old_index, new_index):
        """Reindex documents using the current mappings."""
        conn = self.conn

        if not conn.indices.exists(old_index):
            raise ValueError("Index {0} does not exist!".format(old_index))

        if conn.indices.exists(new_index):
            self._print("Index {0} already exists. "
                        "The mapping will not be changed.".format(new_index))
        else:
            # Create the new index with (presumably) new mapping config
            conn.indices.create(new_index, body=self.get_index_config())

        # Do the actual reindexing.
        self._print("Reindexing {0} to {1}...".format(old_index, new_index))
        helpers.reindex(conn, old_index, new_index)
        self._print("Reindexing done.")
Beispiel #53
0
def reindex(args):
    """Reindex the annotations into a new ElasticSearch index."""
    request = bootstrap(args)

    # Configure the new index
    search_config.configure_index(request.es, args.target)

    # Reindex the annotations
    es_helpers.reindex(client=request.es.conn,
                       source_index=request.es.index,
                       target_index=args.target)

    if args.update_alias:
        request.es.conn.indices.update_aliases(body={'actions': [
            # Remove all existing aliases
            {"remove": {"index": "*", "alias": request.es.index}},
            # Alias current index name to new target
            {"add": {"index": args.target, "alias": request.es.index}},
        ]})
    def test_children_are_reindexed_correctly(self):
        helpers.reindex(self.client, 'test-index', 'real-index')

        q = self.client.get(
            index='real-index',
            doc_type='question',
            id=42,
            fields=['_source']
        )
        if 'fields' in q:
            q.update(q.pop('fields'))
        self.assertEquals(
            {
                '_id': '42',
                '_index': 'real-index',
                '_source': {},
                '_type': 'question',
                '_version': 1,
                'found': True
            }, q
        )
        q = self.client.get(
            index='test-index',
            doc_type='answer',
            id=47,
            parent=42,
            fields=['_source', '_parent']
        )
        if 'fields' in q:
            q.update(q.pop('fields'))
        if '_routing' in q:
            self.assertEquals(q.pop('_routing'), '42')
        self.assertEquals(
            {
                '_id': '47',
                '_index': 'test-index',
                '_source': {'some': 'data'},
                '_type': 'answer',
                '_version': 1,
                '_parent': '42',
                'found': True
            }, q
        )
Beispiel #55
0
def set_up_index(idx):
    alias = es.indices.get_aliases(index=idx)

    if not alias or not alias.keys() or idx in alias.keys():
        # Deal with empty indices or the first migration
        index = '{}_v1'.format(idx)
        search.create_index(index=index)
        logger.info("Reindexing {0} to {1}_v1".format(idx, idx))
        helpers.reindex(es, idx, index)
        logger.info("Deleting {} index".format(idx))
        es.indices.delete(index=idx)
        es.indices.put_alias(idx, index)
    else:
        # Increment version
        version = int(alias.keys()[0].split('_v')[1]) + 1
        logger.info("Incrementing index version to {}".format(version))
        index = '{0}_v{1}'.format(idx, version)
        search.create_index(index=index)
        logger.info("{} index created".format(index))
    return index
    def _setup_alias(self, mappings=None, settings=None):
        """since we only want a single index to be aliased at a time,
        this method will check for the alias and delete the current alias if
        it is different from the index_name.
        """
        index = self.get_index_name()
        self.elasticsearch.indices.create(
            index="", body={"settings": settings, "mappings": mappings})

        if self.alias_exists:
            existing_index = self.index_name(self.current_revision_number)
            new_index = self.index_name(self.current_revision_number + 1)
            # reindex from the existing aliased index onto the new index
            reindex(self.elasticsearch, existing_index, new_index,
                chunk_size=self._chunk_size)
            self.elasticsearch.indices.delete_alias(index=existing_index,
                name=self.alias_name)

        self.elasticsearch.indices.put_alias(
            index=self.index_name, name=self.alias_name)
Beispiel #57
0
    def update(self, doc_type):
        alias_name = 'frisc_{}'.format(doc_type)
        index_name = '{}_v1'.format(alias_name),

        try:
            if not self.esc.exists_alias(alias_name):
                self.create(doc_type)
                return

            version_number = 0
            old_index_name = ''

            old_indexes = self.esc.get_alias(name=alias_name)
            for index in old_indexes.keys():
                match = re.search('^({})_v(\d+)$'.format(alias_name), index)
                if match:
                    version = int(match.group(2))
                    if version > version_number:
                        version_number = version
                        old_index_name = match.group(0)

            version_number += 1
            index_name = '{}_v{}'.format(alias_name, version_number)

            if self.esc.exists(index_name):
                # raise soemthing
                raise

            self.__create__(index_name, type=doc_type)

            reindex(self.es, old_index_name, index_name)

            self.esc.update_aliases(
                body={'actions': [
                    {'remove': {'alias': alias_name, 'index': old_index_name}},
                    {'add': {'alias': alias_name, 'index': index_name}}
                ]}
            )

        except es_exceptions.TransportError:
            print("unable to connect to Elasticsearch")
 def run(self):
     index_name = superdesk.app.config['ELASTICSEARCH_INDEX']
     print('Starting index rebuilding for index: ', index_name)
     try:
         es = get_es(superdesk.app.config['ELASTICSEARCH_URL'])
         clone_name = index_name + '-' + get_random_string()
         print('Creating index: ', clone_name)
         superdesk.app.data.elastic.create_index(clone_name, superdesk.app.config['ELASTICSEARCH_SETTINGS'])
         print('Putting mapping for index: ', clone_name)
         superdesk.app.data.elastic.put_mapping(superdesk.app, clone_name)
         print('Starting index rebuilding.')
         reindex(es, index_name, clone_name)
         print('Finished index rebuilding.')
         print('Deleting index: ', index_name)
         get_indices(es).delete(index_name)
         print('Creating alias: ', index_name)
         get_indices(es).put_alias(index=clone_name, name=index_name)
         print('Alias created.')
     except elasticsearch.exceptions.NotFoundError as nfe:
         print(nfe)
     print('Index {0} rebuilt successfully.'.format(index_name))
def main(settings):
    props = ('_index', '_parent', '_percolate', '_routing', '_timestamp',
            '_ttl', '_type', '_version', '_version_type', '_id', '_retry_on_conflict')
    if settings.backuper.props_to_exclude is not None:
	    props = tuple(p for p in props if p not in settings.backuper.props_to_exclude)

    es = Elasticsearch(settings.hosts)
    if settings.verbosity:
        sys.stdout.write('copying from {} to {}, documents {}\n'.format(settings.backuper.from_index, settings.backuper.index, settings.backuper.doc_type))

    success, failed = helpers.reindex(es, settings.backuper.from_index, settings.backuper.index, scan_kwargs={'doc_type': settings.backuper.doc_type}, bulk_kwargs = {'expand_action_callback': make_expand(props)})
    if settings.verbosity:
        sys.stdout.write('Documents copied: {}. Documents Failed {}\n'.format(success, failed))