Example #1
0
 def _index_table(self, table, dest_idx=None):
     """
     Check that the database tables are in sync with Elasticsearch. If not,
     begin replication.
     """
     last_added_pg_id, _ = get_last_item_ids(table)
     if not last_added_pg_id:
         log.warning('Tried to sync ' + table + ' but it was empty.')
         return
     # Find the last document inserted into elasticsearch
     destination = dest_idx if dest_idx else table
     s = Search(using=self.es, index=destination)
     s.aggs.bucket('highest_pg_id', 'max', field='id')
     try:
         es_res = s.execute()
         last_added_es_id = \
             int(es_res.aggregations['highest_pg_id']['value'])
     except (TypeError, NotFoundError):
         log.info('No matching documents found in elasticsearch. '
                  'Replicating everything.')
         last_added_es_id = 0
     log.info('highest_db_id, highest_es_id: {}, {}'.format(
         last_added_pg_id, last_added_es_id))
     # Select all documents in-between and replicate to Elasticsearch.
     if last_added_pg_id > last_added_es_id:
         log.info('Replicating range ' + str(last_added_es_id) + '-' +
                  str(last_added_pg_id))
         query = SQL('SELECT * FROM {}'
                     ' WHERE id BETWEEN {} AND {} ORDER BY id'.format(
                         table, last_added_es_id, last_added_pg_id))
         self.es.indices.create(index=dest_idx, body=create_mapping(table))
         self._replicate(table, dest_idx, query)
Example #2
0
 def reindex(self, model_name: str, distributed=True):
     """
     Copy contents of the database to a new Elasticsearch index. Create an
     index alias to make the new index the "live" index when finished.
     """
     suffix = uuid.uuid4().hex
     destination_index = model_name + '-' + suffix
     if distributed:
         self.es.indices.create(index=destination_index,
                                body=create_mapping(model_name))
         schedule_distributed_index(database_connect(), destination_index)
     else:
         self._index_table(model_name, dest_idx=destination_index)
         self.go_live(destination_index, model_name)