def make_app(debug=False): es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',') ) redis_client = redis.Redis(host=os.environ['REDIS_HOST']) lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT']) ) return Application( [ URLSpec('/profile', Profile, name='profile'), URLSpec('/search', Search, name='search'), URLSpec('/download/([^/]+)', DownloadId, name='download_id'), URLSpec('/download', Download, name='download'), URLSpec('/metadata/([^/]+)', Metadata, name='metadata'), URLSpec('/augment', Augment, name='augment'), URLSpec('/upload', Upload, name='upload'), URLSpec('/statistics', Statistics, name='statistics'), URLSpec('/version', Version, name='version'), URLSpec('/health', Health, name='health'), ], debug=debug, es=es, redis_client=redis_client, lazo=lazo_client )
async def _run(self): self.elasticsearch = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',')) self.lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) connection = await aio_pika.connect_robust( host=os.environ['AMQP_HOST'], port=int(os.environ['AMQP_PORT']), login=os.environ['AMQP_USER'], password=os.environ['AMQP_PASSWORD'], ) self.channel = await connection.channel() await self.channel.set_qos(prefetch_count=1) await self._amqp_setup() # Start profiling process log_future(self._call(self.main_loop), logger) if hasattr(self, 'handle_query'): log_future(self.loop.create_task(self._consume_queries()), logger, should_never_exit=True)
def clear(source): es = PrefixedElasticsearch() lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) hits = es.scan( index='datasets,pending', query={ 'query': { 'bool': { 'should': [ { 'term': { 'materialize.identifier': source, }, }, { 'term': { 'source': source, }, }, ], 'minimum_should_match': 1, }, }, }, _source=False, size=SIZE, ) for h in hits: delete_dataset_from_index(es, h['_id'], lazo_client)
def delete(datasets): es = PrefixedElasticsearch() lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) for dataset in datasets: delete_dataset_from_index(es, dataset, lazo_client)
def __init__(self): self.profile_semaphore = threading.Semaphore(MAX_CONCURRENT_PROFILE) self.es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',')) self.lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) if os.environ.get('NOMINATIM_URL'): self.nominatim = os.environ['NOMINATIM_URL'] else: self.nominatim = None logger.warning("$NOMINATIM_URL is not set, not resolving URLs") self.geo_data = GeoData.from_local_cache() self.channel = None self.geo_data.load_areas([0, 1, 2], bounds=True) self.loop = asyncio.get_event_loop() log_future(self.loop.create_task(self._run()), logger, should_never_exit=True) # Retry a few times, in case the Elasticsearch container is not yet up for i in itertools.count(): try: if not self.es.indices.exists('datamart'): raise RuntimeError("'datamart' index does not exist") except Exception: logger.warning("Can't connect to Elasticsearch, retrying...") if i == 5: raise else: time.sleep(5) else: break
def make_app(debug=False): es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',')) redis_client = redis.Redis(host=os.environ['REDIS_HOST']) lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) return Application( [ ApiRule('/profile', '1', Profile), ApiRule('/search', '1', Search), ApiRule('/download/([^/]+)', '1', DownloadId), ApiRule('/download', '1', Download), ApiRule('/metadata/([^/]+)', '1', Metadata), ApiRule('/augment', '1', Augment), ApiRule('/augment/([^/]+)', '1', AugmentResult), ApiRule('/upload', '1', Upload), ApiRule('/session/new', '1', SessionNew), ApiRule('/session/([^/]+)', '1', SessionGet), ApiRule('/location', '1', LocationSearch), ApiRule('/statistics', '1', Statistics), ApiRule('/version', '1', Version), URLSpec('/health', Health), ], debug=debug, es=es, redis_client=redis_client, lazo=lazo_client, default_handler_class=CustomErrorHandler, default_handler_args={"status_code": 404}, )
async def import_all(folder): es = PrefixedElasticsearch() if 'LAZO_SERVER_HOST' in os.environ: lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) else: lazo_client = None dataset_docs = [] lazo_docs = [] for name in os.listdir(folder): if name.startswith('lazo.'): lazo_docs.append(name) else: dataset_docs.append(name) for i, name in enumerate(dataset_docs): if i % 50 == 0: print( "\nImporting to Elasticsearch, %d/%d" % (i, len(dataset_docs)), flush=True, ) path = os.path.join(folder, name) with open(path, 'r') as fp: obj = json.load(fp) dataset_id = decode_dataset_id(name) try: delete_dataset_from_index(es, dataset_id, lazo_client) add_dataset_to_index(es, dataset_id, obj) except elasticsearch.TransportError: print('X', end='', flush=True) time.sleep(10) # If writing can't keep up, needs a real break delete_dataset_from_index(es, dataset_id, lazo_client) add_dataset_to_index(es, dataset_id, obj) print('.', end='', flush=True) for i, name in enumerate(lazo_docs): if i % 500 == 0: print( "\nImporting to Lazo, %d/%d" % (i, len(lazo_docs)), flush=True, ) path = os.path.join(folder, name) with open(path, 'r') as fp: obj = json.load(fp) dataset_id = decode_dataset_id(name[5:]).rsplit('.', 1)[0] lazo_es_id = obj.pop('_id') assert lazo_es_id.split('__.__')[0] == dataset_id try: add_dataset_to_lazo_storage(es, lazo_es_id, obj) except elasticsearch.TransportError: print('X', end='', flush=True) time.sleep(10) # If writing can't keep up, needs a real break add_dataset_to_lazo_storage(es, lazo_es_id, obj) if i % 10 == 0: print('.', end='', flush=True)
def delete(datasets): es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',')) lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) for dataset in datasets: delete_dataset_from_index(es, dataset, lazo_client)
def clear(identifier): es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',')) lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) hits = elasticsearch.helpers.scan( es, index='datamart', query={ 'query': { 'term': { 'materialize.identifier': identifier, }, }, }, _source=False, size=SIZE, ) for h in hits: delete_dataset_from_index(es, h['_id'], lazo_client)
async def run(self): self.elasticsearch = PrefixedElasticsearch() self.lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT'])) connection = await aio_pika.connect_robust( host=os.environ['AMQP_HOST'], port=int(os.environ['AMQP_PORT']), login=os.environ['AMQP_USER'], password=os.environ['AMQP_PASSWORD'], ) self.channel = await connection.channel() await self.channel.set_qos(prefetch_count=1) await self._amqp_setup() # Start profiling process try: await self._call(self.discover_datasets) except Exception as e: sentry_sdk.capture_exception(e) logger.exception("Exception in discoverer %s", self.identifier) sys.exit(1)
async def import_all(folder): es = elasticsearch.Elasticsearch( os.environ['ELASTICSEARCH_HOSTS'].split(',') ) amqp_conn = await aio_pika.connect_robust( host=os.environ['AMQP_HOST'], port=int(os.environ['AMQP_PORT']), login=os.environ['AMQP_USER'], password=os.environ['AMQP_PASSWORD'], ) amqp_chan = await amqp_conn.channel() amqp_datasets_exchange = await amqp_chan.declare_exchange( 'datasets', aio_pika.ExchangeType.TOPIC, ) if 'LAZO_SERVER_HOST' in os.environ: lazo_client = lazo_index_service.LazoIndexClient( host=os.environ['LAZO_SERVER_HOST'], port=int(os.environ['LAZO_SERVER_PORT']) ) else: lazo_client = None print("Importing Elasticsearch data", end='', flush=True) for name in os.listdir(folder): if name.startswith('lazo.'): continue path = os.path.join(folder, name) with open(path, 'r') as fp: obj = json.load(fp) dataset_id = decode_dataset_id(name) try: delete_dataset_from_index(es, dataset_id, lazo_client) add_dataset_to_index(es, dataset_id, obj) except elasticsearch.TransportError: print('X', end='', flush=True) time.sleep(10) # If writing can't keep up, needs a real break delete_dataset_from_index(es, dataset_id, lazo_client) add_dataset_to_index(es, dataset_id, obj) await amqp_datasets_exchange.publish( json2msg(dict(obj, id=dataset_id)), dataset_id, ) print('.', end='', flush=True) print("Importing Lazo data", end='', flush=True) for name in os.listdir(folder): if not name.startswith('lazo.'): continue path = os.path.join(folder, name) with open(path, 'r') as fp: obj = json.load(fp) dataset_id = decode_dataset_id(name[5:]).rsplit('.', 1)[0] lazo_es_id = obj.pop('_id') assert lazo_es_id.split('__.__')[0] == dataset_id try: add_dataset_to_lazo_storage(es, lazo_es_id, obj) except elasticsearch.TransportError: print('X', end='', flush=True) time.sleep(10) # If writing can't keep up, needs a real break add_dataset_to_lazo_storage(es, lazo_es_id, obj) print('.', end='', flush=True)