def import_populate_db(): es_host = app.config['ELASTICSEARCH'] es_idx = app.config['ES_IDX'] es = Elasticsearch(f'{es_host}:9200') idx = IndicesClient(es) if idx.exists(index=es_idx): existing_docs = es.count(index=es_idx).get('count') if existing_docs: return existing_docs, 0 app.logger.info("Drop and creating index and mapping definition") idx.delete(es_idx) else: idx.create(index=es_idx, body=mapping) app.logger.info("Populating elasticsearch with documents") errors = [] for pdv in pdvs: document_id = ''.join(filter(str.isdigit, pdv.get('document'))) pdv['document'] = document_id del pdv['id'] try: es.index(index=es_idx, body=pdv, id=document_id) except Exception as ex: app.logger.exception(ex) errors.append({'id': pdv.get('id'), 'description': ex.args}) inserted = len(pdvs) - len(errors) return errors, inserted
class ElasticIndiceDriver: def __init__(self, client: Elasticsearch): self.client = IndicesClient(client) def create_index(self, index: str, mapping: dict): self.client.create(index, json.dumps(mapping)) def clean_index(self, index: str): self.client.delete(index) self.client.delete(f'{index}-finished')
def create(cls, user, **kwargs): """Create user index.""" # Create index for user client = Elasticsearch(cls.__url__) indice = IndicesClient(client) if indice.exists(index=user.user_id): if 'delete_existing' in kwargs and kwargs['delete_existing']: log.warn('Deleting existing index for user %s' % user.user_id) indice.delete(index=user.user_id) else: log.warn('Index already exists for user %s' % user.user_id) return False log.info('Creating index for user %s' % user.user_id) indice.create(index=user.user_id) return True
def create(cls, user, **kwargs): """Create user index.""" # Create index for user client = Elasticsearch(cls.__url__) indice = IndicesClient(client) if indice.exists(index=user.user_id): if 'delete_existing' in kwargs and kwargs['delete_existing']: log.warn('Deleting existing index for user %s' % user.user_id) indice.delete(index=user.user_id) else: log.warn('Index already exists for user %s' % user.user_id) return False log.info('Creating index for user %s' % user.user_id) indice.create(index=user.user_id) return True
def recreate_index_model(self, model: Union[type[Gallery], type[Archive]]): from elasticsearch.client.indices import IndicesClient indices_client = IndicesClient(client=self.es_client) index_name = model._meta.es_index_name # type: ignore if indices_client.exists(index_name): indices_client.delete(index=index_name) indices_client.create(index=index_name) indices_client.close(index=index_name) indices_client.put_settings( index=index_name, body={ "index": { "max_result_window": settings.MAX_RESULT_WINDOW }, "analysis": { "filter": { "edge_ngram_filter": { "type": "edge_ngram", "min_gram": 2, "max_gram": 20 } }, "analyzer": { "edge_ngram_analyzer": { "type": "custom", "tokenizer": "standard", "filter": ["lowercase", "edge_ngram_filter"] } } } }) indices_client.put_mapping( body=model._meta.es_mapping, # type: ignore index=index_name, ) indices_client.open(index=index_name)
def initialize_elastic_search() -> Tuple[Elasticsearch, IndicesClient]: elastic_search = Elasticsearch(hosts=[{"host": "localhost", "port": 9200}]) indices_client = IndicesClient(client=elastic_search) try: indices_client.create( index=INDEX, body={ "mappings": { "properties": { "doc": { "type": "text" }, "vector": { "type": "dense_vector", "dims": 768 }, } } }, ) except RequestError: pass return elastic_search, indices_client
def create_index(esconn, index_name, data_file, shard_count): index = IndicesClient(esconn) try: index_json = open(data_file) body = index_json.read() json_body = json.loads(body) # Work out number of shards == no. of data nodes x 2 print("Setting Index Shard Count to: " + str(shard_count)) # Update json doc json_body["settings"]["index"]["number_of_shards"] = shard_count # For single node clusters (shard_count will be 2)- no replicas possible if shard_count == 2: print("Single node cluster detected - disabling replicas") json_body["settings"]["index"]["number_of_replicas"] = 0 # Create Index and Apply any settings & mappings idx = index.create( index = index_name, body = json_body ) if idx['acknowledged'] != True: raise ES_INDEX_ERROR('Failed to create Index. Response: ', idx) print("SUCCESS: Created Index: " + index_name) except Exception as ex: raise ES_PIPELINE_ERROR(ex)
def main(): c_parser = configparser.ConfigParser() c_parser.read("config.ini") es_config = c_parser["ELASTIC"] gtfs_config = c_parser["GTFS"] gtfs_path = gtfs_config["gtfs_path"] index_prefix = es_config["index_prefix"] stops_index = index_prefix + "_stops" shapes_index = index_prefix + "_shapes" stop_times_index = index_prefix + "_stop_times" es = Elasticsearch( host=es_config["host"], scheme=es_config["scheme"], port=es_config.getint("port"), http_auth=(es_config["username"], es_config["password"]), use_ssl=es_config.getboolean("use_ssl"), verify_certs=es_config.getboolean("verify_certs"), ca_certs=certifi.where()) with open("mappings/shapes.json", 'r' ) as shapes_mapping_file: shapes_mapping = shapes_mapping_file.read() with open("mappings/stops.json", 'r' ) as stops_mapping_file: stops_mapping = stops_mapping_file.read() with open("mappings/stop_times.json", 'r') as stop_times_file: stop_times_mapping = stop_times_file.read() indices = IndicesClient(es) indices.create(stops_index, body=stops_mapping) indices.create(shapes_index, body=shapes_mapping) indices.create(stop_times_index, body=stop_times_mapping) all_stops = gather_stops(gtfs_path) for ok, item in parallel_bulk(es, genbulkactions(stops_index, all_stops.values()), chunk_size=500): if not ok: print(item) print("Done with stops") all_shapes = gather_shapes(gtfs_path) all_trips = gather_trips(gtfs_path) all_routes = gather_routes(gtfs_path) shapes_to_route = shape_to_route_dict(all_trips.values(), all_routes) for shape_id in shapes_to_route.keys(): all_shapes[shape_id]['route'] = shapes_to_route[shape_id] all_shapes[shape_id].pop('start_seq', None) all_shapes[shape_id].pop('finish_seq', None) for ok, item in parallel_bulk(es, genbulkactions(shapes_index, all_shapes.values()), chunk_size=500): if not ok: print(item) print("Done with shapes") for trip in all_trips.values(): route_id = trip.pop("route_id", None) if route_id: trip['route'] = all_routes[int(route_id)] all_stop_times = gather_stop_times(gtfs_path) for stop_time in all_stop_times: trip_id = stop_time.pop("trip_id", None) stop_id = stop_time.pop("stop_id", None) if trip_id: stop_time['trip'] = all_trips[int(trip_id)] if stop_id: stop_time['stop'] = all_stops[int(stop_id)] for ok, item in parallel_bulk(es, genbulkactions(stop_times_index, all_stop_times), chunk_size=1000): if not ok: print(item) print("Done with stop times")