def sync_es(session, batch_size=1000): last_update, date_now = es_sync.get_status(session) log.info('Last update time: {}'.format(last_update)) if not last_update: raise Exception('No last update time, run `fill_index` to do an ' 'initial import into ElasticSearch') # get all documents that have changed since the last update changed_documents = \ get_changed_documents(session, last_update) + \ get_changed_users(session, last_update) + \ get_changed_documents_for_associations(session, last_update) + \ get_deleted_locale_documents(session, last_update) + \ get_tagged_documents(session, last_update) log.info('Number of changed documents: {}'.format(len(changed_documents))) if changed_documents: sync_documents(session, changed_documents, batch_size) # get list of documents deleted since the last update deleted_documents = get_deleted_documents(session, last_update) log.info('Number of deleted documents: {}'.format(len(deleted_documents))) if deleted_documents: sync_deleted_documents(session, deleted_documents, batch_size) es_sync.mark_as_updated(session, date_now) log.info('Sync has finished')
def fill_index(session, batch_size=1000): client = elasticsearch_config['client'] index_name = elasticsearch_config['index'] status = {'start_time': datetime.now(), 'last_progress_update': None} _, date_now = es_sync.get_status(session) total = session.query(Document). \ filter(Document.redirects_to.is_(None)).count() def progress(count, total_count): if status['last_progress_update'] is None or \ status['last_progress_update'] + timedelta(seconds=1) < \ datetime.now(): print('{0} of {1}'.format(count, total_count)) status['last_progress_update'] = datetime.now() batch = ElasticBatch(client, batch_size) count = 0 with batch: for doc_type in document_types: print('Importing document type {}'.format(doc_type)) to_search_document = search_documents[doc_type].to_search_document for doc in sync.get_documents(session, doc_type, batch_size): batch.add(to_search_document(doc, index_name)) count += 1 progress(count, total) es_sync.mark_as_updated(session, date_now) duration = datetime.now() - status['start_time'] print('Done (duration: {0})'.format(duration))
def sync_es(session): last_update, date_now = es_sync.get_status(session) if not last_update: raise Exception('No last update time, run `fill_index` to do an ' 'initial import into ElasticSearch') # get all documents that have changed since the last update # TODO also check changes to associations changed_documents = get_changed_documents(session, last_update) + \ get_changed_users(session, last_update) if changed_documents: sync_documents(session, changed_documents) es_sync.mark_as_updated(session, date_now)
def sync_es(session, batch_size=1000): last_update, date_now = es_sync.get_status(session) log.info('Last update time: {}'.format(last_update)) if not last_update: raise Exception('No last update time, run `fill_index` to do an ' 'initial import into ElasticSearch') # get all documents that have changed since the last update changed_documents = \ get_changed_documents(session, last_update) + \ get_changed_users(session, last_update) + \ get_changed_documents_for_associations(session, last_update) log.info('Number of changed documents: {}'.format(len(changed_documents))) if changed_documents: sync_documents(session, changed_documents, batch_size) es_sync.mark_as_updated(session, date_now) log.info('Sync has finished')
def fill_index(session): client = elasticsearch_config['client'] index_name = elasticsearch_config['index'] status = { 'start_time': datetime.now(), 'last_progress_update': None } _, date_now = es_sync.get_status(session) total = session.query(Document). \ filter(Document.redirects_to.is_(None)).count() def progress(count, total_count): if status['last_progress_update'] is None or \ status['last_progress_update'] + timedelta(seconds=1) < \ datetime.now(): print('{0} of {1}'.format(count, total_count)) status['last_progress_update'] = datetime.now() batch = ElasticBatch(client, batch_size) count = 0 with batch: for doc_type in document_types: print('Importing document type {}'.format(doc_type)) to_search_document = search_documents[doc_type].to_search_document for doc in sync.get_documents(session, doc_type): batch.add(to_search_document(doc, index_name)) count += 1 progress(count, total) es_sync.mark_as_updated(session, date_now) duration = datetime.now() - status['start_time'] print('Done (duration: {0})'.format(duration))
def test_mark_as_updated(self): _, date_now = get_status(self.session) mark_as_updated(self.session, date_now) last_update, _ = get_status(self.session) self.assertEqual(last_update, date_now)
def _add_test_data(self): _, date_now = es_sync.get_status(self.session) es_sync.mark_as_updated(self.session, date_now) self.waypoint1 = Waypoint( document_id=71171, waypoint_type='summit', elevation=2000, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='fr', title='Mont Granier', description='...', summary='Le Mont [b]Granier[/b]'), WaypointLocale( lang='en', title='Mont Granier', description='...', summary='The Mont Granier') ]) self.waypoint2 = Waypoint( document_id=71172, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.route1 = Route( document_id=71173, activities=['skitouring'], elevation_max=1500, elevation_min=700, height_diff_up=800, height_diff_down=800, durations='1', locales=[ RouteLocale( lang='en', title='Face N', description='...', gear='paraglider', title_prefix='Mont Blanc' ) ] ) self.waypoint3 = Waypoint( document_id=71174, redirects_to=71171, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.outing1 = Outing( activities=['skitouring'], date_start=datetime.date(2016, 1, 1), date_end=datetime.date(2016, 1, 1), locales=[ OutingLocale( lang='en', title='...', description='...', weather='sunny') ] ) self.session.add_all([ self.waypoint1, self.waypoint2, self.waypoint3, self.route1, self.outing1 ]) self.session.flush() user_id = self.global_userids['contributor'] DocumentRest.create_new_version(self.waypoint1, user_id) DocumentRest.create_new_version(self.waypoint2, user_id) DocumentRest.create_new_version(self.waypoint3, user_id) DocumentRest.create_new_version(self.route1, user_id) DocumentRest.create_new_version(self.outing1, user_id) association_wr = Association.create(self.waypoint1, self.route1) association_ww = Association.create(self.waypoint2, self.waypoint1) association_ro = Association.create(self.route1, self.outing1) user = self.session.query(UserProfile).get( self.global_userids['contributor']) association_uo = Association.create(user, self.outing1) self.session.add_all([ association_wr, association_ww, association_ro, association_uo, association_wr.get_log(user_id), association_ww.get_log(user_id), association_ro.get_log(user_id), association_uo.get_log(user_id) ]) self.session.flush()
def _add_test_data(self): _, date_now = es_sync.get_status(self.session) es_sync.mark_as_updated(self.session, date_now) self.waypoint1 = Waypoint( document_id=71171, waypoint_type='summit', elevation=2000, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='fr', title='Mont Granier', description='...', summary='Le Mont [b]Granier[/b]'), WaypointLocale( lang='en', title='Mont Granier', description='...', summary='The Mont Granier') ]) self.waypoint2 = Waypoint( document_id=71172, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.route1 = Route( document_id=71173, activities=['skitouring'], elevation_max=1500, elevation_min=700, height_diff_up=800, height_diff_down=800, durations='1', locales=[ RouteLocale( lang='en', title='Face N', description='...', gear='paraglider', title_prefix='Mont Blanc' ) ] ) self.waypoint3 = Waypoint( document_id=71174, redirects_to=71171, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry( geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale( lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.session.add_all([ self.waypoint1, self.waypoint2, self.waypoint3, self.route1]) self.session.flush() user_id = self.global_userids['contributor'] DocumentRest.create_new_version(self.waypoint1, user_id) DocumentRest.create_new_version(self.waypoint2, user_id) DocumentRest.create_new_version(self.waypoint3, user_id) DocumentRest.create_new_version(self.route1, user_id)
def _add_test_data(self): _, date_now = es_sync.get_status(self.session) es_sync.mark_as_updated(self.session, date_now) self.waypoint1 = Waypoint( document_id=71171, waypoint_type='summit', elevation=2000, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='fr', title='Mont Granier', description='...', summary='Le Mont [b]Granier[/b]'), WaypointLocale(lang='en', title='Mont Granier', description='...', summary='The Mont Granier') ]) self.waypoint2 = Waypoint( document_id=71172, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.route1 = Route(document_id=71173, activities=['skitouring'], elevation_max=1500, elevation_min=700, height_diff_up=800, height_diff_down=800, durations='1', locales=[ RouteLocale(lang='en', title='Face N', description='...', gear='paraglider', title_prefix='Mont Blanc') ]) self.waypoint3 = Waypoint( document_id=71174, redirects_to=71171, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.outing1 = Outing(activities=['skitouring'], date_start=datetime.date(2016, 1, 1), date_end=datetime.date(2016, 1, 1), locales=[ OutingLocale(lang='en', title='...', description='...', weather='sunny') ]) self.session.add_all([ self.waypoint1, self.waypoint2, self.waypoint3, self.route1, self.outing1 ]) self.session.flush() user_id = self.global_userids['contributor'] DocumentRest.create_new_version(self.waypoint1, user_id) DocumentRest.create_new_version(self.waypoint2, user_id) DocumentRest.create_new_version(self.waypoint3, user_id) DocumentRest.create_new_version(self.route1, user_id) DocumentRest.create_new_version(self.outing1, user_id) association_wr = Association.create(self.waypoint1, self.route1) association_ww = Association.create(self.waypoint2, self.waypoint1) association_ro = Association.create(self.route1, self.outing1) user = self.session.query(UserProfile).get( self.global_userids['contributor']) association_uo = Association.create(user, self.outing1) self.session.add_all([ association_wr, association_ww, association_ro, association_uo, association_wr.get_log(user_id), association_ww.get_log(user_id), association_ro.get_log(user_id), association_uo.get_log(user_id) ]) self.session.flush()
def _add_test_data(self): _, date_now = es_sync.get_status(self.session) es_sync.mark_as_updated(self.session, date_now) self.waypoint1 = Waypoint( document_id=71171, waypoint_type='summit', elevation=2000, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='fr', title='Mont Granier', description='...', summary='Le Mont [b]Granier[/b]'), WaypointLocale(lang='en', title='Mont Granier', description='...', summary='The Mont Granier') ]) self.waypoint2 = Waypoint( document_id=71172, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.route1 = Route(document_id=71173, activities=['skitouring'], elevation_max=1500, elevation_min=700, height_diff_up=800, height_diff_down=800, durations='1', locales=[ RouteLocale(lang='en', title='Face N', description='...', gear='paraglider', title_prefix='Mont Blanc') ]) self.waypoint3 = Waypoint( document_id=71174, redirects_to=71171, waypoint_type='summit', elevation=4985, geometry=DocumentGeometry(geom='SRID=3857;POINT(635956 5723604)'), locales=[ WaypointLocale(lang='en', title='Mont Blanc', description='...', summary='The heighest point in Europe') ]) self.session.add_all( [self.waypoint1, self.waypoint2, self.waypoint3, self.route1]) self.session.flush() user_id = self.global_userids['contributor'] DocumentRest.create_new_version(self.waypoint1, user_id) DocumentRest.create_new_version(self.waypoint2, user_id) DocumentRest.create_new_version(self.waypoint3, user_id) DocumentRest.create_new_version(self.route1, user_id)