def _index_table(self, table, dest_idx=None): """ Check that the database tables are in sync with Elasticsearch. If not, begin replication. """ last_added_pg_id, _ = get_last_item_ids(table) if not last_added_pg_id: log.warning('Tried to sync ' + table + ' but it was empty.') return # Find the last document inserted into elasticsearch destination = dest_idx if dest_idx else table s = Search(using=self.es, index=destination) s.aggs.bucket('highest_pg_id', 'max', field='id') try: es_res = s.execute() last_added_es_id = \ int(es_res.aggregations['highest_pg_id']['value']) except (TypeError, NotFoundError): log.info('No matching documents found in elasticsearch. ' 'Replicating everything.') last_added_es_id = 0 log.info('highest_db_id, highest_es_id: {}, {}'.format( last_added_pg_id, last_added_es_id)) # Select all documents in-between and replicate to Elasticsearch. if last_added_pg_id > last_added_es_id: log.info('Replicating range ' + str(last_added_es_id) + '-' + str(last_added_pg_id)) query = SQL('SELECT * FROM {}' ' WHERE id BETWEEN {} AND {} ORDER BY id'.format( table, last_added_es_id, last_added_pg_id)) self.es.indices.create(index=dest_idx, body=create_mapping(table)) self._replicate(table, dest_idx, query)
def reindex(self, model_name: str, distributed=True): """ Copy contents of the database to a new Elasticsearch index. Create an index alias to make the new index the "live" index when finished. """ suffix = uuid.uuid4().hex destination_index = model_name + '-' + suffix if distributed: self.es.indices.create(index=destination_index, body=create_mapping(model_name)) schedule_distributed_index(database_connect(), destination_index) else: self._index_table(model_name, dest_idx=destination_index) self.go_live(destination_index, model_name)