def elastic_bulk_save(process_name, index, doc_type, records, ids=None, parents=None, retry=True): try: if ids is None: ids = [None] * len(records) if parents is None: parents = [None] * len(records) wait_for_yellow_cluster_status(process_name) es_helpers.bulk(es, ({ '_index': index, '_id': idx, '_parent': parent, '_type': doc_type, '_source': r } for (r, idx, parent) in zip(records, ids, parents))) except Exception as e: for (r, idx) in zip(records, ids): if retry: elastic_bulk_save(process_name, index, doc_type, [r], [idx], retry=False) else: raise ElasticException(process_name, 'Error saving to Elastic', actionable_info=str(r), cause=str(e))
def elastic_update(process_name, index, doc_type, record, id): try: wait_for_yellow_cluster_status(process_name) es.update(index=index, doc_type=doc_type, id=id, body=record) except Exception as e: raise ElasticException(process_name, 'Error updating {} in Elastic'.format(id), actionable_info=str(record), cause=str(e))
def index_crops_in_elastic(crop_id, query): try: es.index(index=crop_index, doc_type=crop_mapping, id=crop_id, body=query) except ElasticsearchException as e: raise ElasticException('CROPS', 'ElasticSearch Error from query: ' + str(query), e)
def retrieve_crops_from_elastic(query): try: result = es.search(index=crop_index, doc_type=crop_mapping, body=query) crops = [hits for hits in result['hits']['hits']] return {'total': result['hits']['total'], 'crops': crops} except ElasticsearchException as e: raise ElasticException('CROPS', 'ElasticSearch Error from query: ' + str(query), e)
def index_parcel(parcel): try: es.index(index=parcel_index, doc_type=parcel_mapping, body=parcel, id=Parcel.get_cadastral_reference(parcel)) except ElasticsearchException as e: raise ElasticException('CADASTRE', 'Error indexing parcel', cause=e, actionable_info=parcel)
def get_parcels_by_bbox(min_lat, min_lon, max_lat, max_lon): try: query = { "query": { "bool": { "must": { "match_all": {} }, "filter": { "geo_shape": { "bbox": { "shape": { "type": "envelope", "coordinates": [ [min_lon, min_lat], [max_lon, max_lat]] } } } } } } } result = es.search(index=parcel_index, doc_type=parcel_mapping, body=query, size=max_elastic_query_size) parcels = [hits['_source'] for hits in result['hits']['hits']] if query_cadastre_when_bbox: to_update = list() to_update += add_public_cadastral_info(parcels) to_update += add_elevation_from_google(parcels) to_update = set(to_update) updatable_parcels = [parcel for parcel in parcels if Parcel.get_cadastral_reference(parcel) in to_update] store_parcels(updatable_parcels) # Convert into geojson for parcel in parcels: parcel['type'] = 'Feature' parcels_geojson = {'type': 'FeatureCollection', 'features': parcels} return parcels_geojson except ElasticsearchException as e: raise ElasticException('PARCEL', e.message, e)
def wait_for_yellow_cluster_status(process_name): logger.debug('Check cluster status, waiting for yellow or green status...') for retry in itertools.count(): with timer.elapsed_timer() as elapsed: if elapsed() < timeout: try: cluster_status = es.cluster.health( wait_for_status='yellow', timeout=timeout) logger.debug('Cluster status: %s', cluster_status['status']) if cluster_status['status'] != 'red': break raise Exception('Red status') except TransportError as e: if retry > settings.ELASTICSEARCH['retries']: raise ElasticException(process_name, 'Error connecting to elastic', cause=str(e)) except Exception as e: if retry > settings.ELASTICSEARCH['retries']: raise ElasticException( process_name, 'Error waiting for yellow status', cause=str(e))
def get_closest_station(lat, lon): try: query = { "size": 1, "sort": [{ "_geo_distance": { "lat_lon": { "lat": lat, "lon": lon }, "order": "asc", "unit": "km", "mode": "min", "distance_type": "sloppy_arc" } }] } result = es.search(index=es_index, doc_type=es_station_mapping, body=query) stations = [hits['_source'] for hits in result['hits']['hits']] closest_station = {} if len(stations) == 1: closest_station = stations[0] this_loc = (lat, lon) station_loc = (closest_station['lat_lon']['lat'], closest_station['lat_lon']['lon']) closest_station['distance_to_parcel'] = \ great_circle(this_loc, station_loc).kilometers return closest_station except ElasticsearchException as e: raise ElasticException('CLIMATE', 'ElasticSearch Error getting closest station', e)
def get_closest_soil_measure(lat, lon): try: query = { "size": 1, "sort": [{ "_geo_distance": { "coordinates": { "lat": lat, "lon": lon }, "order": "asc", "unit": "km", "mode": "min", "distance_type": "sloppy_arc" } }] } result = es.search(index=es_index, doc_type=es_soil_mapping, body=query) soil_measures = [hits['_source'] for hits in result['hits']['hits']] soil_measures_distances = [ hits['sort'] for hits in result['hits']['hits'] ] closest_soil_measure = {} if len(soil_measures) == 1: closest_soil_measure = soil_measures[0] closest_soil_measure[ 'distance_to_parcel'] = soil_measures_distances[0][0] return closest_soil_measure except ElasticsearchException as e: raise ElasticException( 'SOIL', 'ElasticSearch Error getting closest soil measure', e)
def store_daily_document(document, lat_lon, altitud, index=es_index, mapping=es_daily_mapping): document['lat_lon'] = lat_lon document['altitud'] = altitud try: if document['HORMINHUMMAX'] is not None: document['HORMINHUMMAX'] = document['HORMINHUMMAX'].zfill( 4).replace('2400', '0000') if document['HORMINHUMMIN'] is not None: document['HORMINHUMMIN'] = document['HORMINHUMMIN'].zfill( 4).replace('2400', '0000') if document['HORMINTEMPMAX'] is not None: document['HORMINTEMPMAX'] = document['HORMINTEMPMAX'].zfill( 4).replace('2400', '0000') if document['HORMINTEMPMIN'] is not None: document['HORMINTEMPMIN'] = document['HORMINTEMPMIN'].zfill( 4).replace('2400', '0000') if document['HORMINVELMAX'] is not None: document['HORMINVELMAX'] = document['HORMINVELMAX'].zfill( 4).replace('2400', '0000') id = document[u'FECHA'].replace('/', '_') + '_' + \ document[u'IDPROVINCIA'] + '_' + \ document[u'IDESTACION'] util.wait_for_yellow_cluster_status('STORE_INFORIEGO_DAILY') es.index(index=index, doc_type=mapping, id=id, body=document) except Exception as e: raise ElasticException('INFORIEGO', 'Error saving to Elastic', actionable_info=str(document))
def get_parcels_by_cadastral_code(cadastral_code, include_public_info=False, include_google_info=False): logger.debug('get_parcels_by_cadastral_code(%s,%s)', cadastral_code, include_public_info) try: query = { "query": { "match": { "properties.nationalCadastralReference": cadastral_code } } } result = es.search(index=parcel_index, doc_type=parcel_mapping, body=query) parcels = [hits['_source'] for hits in result['hits']['hits']] if not parcels: parcels = get_inspire_data_by_code(cadastral_code) if include_public_info: add_public_cadastral_info(parcels) # Convert into geojson for parcel in parcels: parcel['type'] = 'Feature' if include_google_info: add_elevation_from_google(parcels) return parcels except ElasticsearchException as e: raise ElasticException('PARCEL', e.message, e)
def get_aggregated_climate_measures(station_id, province_id, num_years_back): try: query = { "size": 0, "query": { "constant_score": { "filter": { "bool": { "must": [{ "term": { "IDESTACION": station_id } }, { "term": { "IDPROVINCIA": province_id } }] } } } }, "aggs": { "last_year": { "terms": { "field": u'AÑO', "order": { "_term": "desc" }, "size": 1 }, "aggs": { "sum_rainfall": { "sum": { "field": "PRECIPITACION" } }, "max_temperature": { "max": { "field": "TEMPMAX" } }, "min_temperature": { "min": { "field": "TEMPMIN" } }, "avg_temperature": { "avg": { "field": "TEMPMEDIA" } }, "avg_sun_hours": { "avg": { "field": "N" } }, "max_sun_hours": { "max": { "field": "N" } }, "sum_sun_hours": { "sum": { "field": "N" } }, "avg_radiation": { "avg": { "field": "RADIACION" } }, "max_radiation": { "max": { "field": "RADIACION" } }, "sum_radiation": { "sum": { "field": "RADIACION" } }, "rainy_days": { "filter": { "range": { "PRECIPITACION": { "gt": 0 } } } } } }, "by_month": { "terms": { "field": u'AÑO', "order": { "_term": "desc" }, "size": num_years_back }, "aggs": { "measure": { "date_histogram": { "field": "FECHA", "interval": "month", "format": "M" }, "aggs": { "rainfall": { "sum": { "field": "PRECIPITACION" } }, "avg_temperature": { "avg": { "field": "TEMPMEDIA" } }, "sun_hours": { "sum": { "field": "N" } }, "radiation": { "sum": { "field": "RADIACION" } } } } } }, "by_day": { "terms": { "field": u'AÑO', "order": { "_term": "desc" }, "size": num_years_back }, "aggs": { "measure": { "date_histogram": { "field": "FECHA", "interval": "day", "format": "dd-MM-yyyy" }, "aggs": { "avg_temperature": { "avg": { "field": "TEMPMEDIA" } }, "sun_hours": { "sum": { "field": "N" } }, "radiation": { "sum": { "field": "RADIACION" } } } } } } } } result = es.search(index=es_index, doc_type=es_daily_mapping, body=query) hit = result['aggregations'] by_month = parse_by_month(hit['by_month']['buckets']) by_day = parse_by_day(hit['by_day']['buckets']) last_year = parse_last_year(hit['last_year']['buckets']) return {'by_month': by_month, 'by_day': by_day, 'last_year': last_year} except ElasticsearchException as e: raise ElasticException('CLIMATE', 'ElasticSearch error getting climate aggrs', e)
def get_bucket_of_parcels_by_bbox_and_precision( min_lat, min_lon, max_lat, max_lon, precision): try: query = { "size": 0, "query": { "bool": { "must": { "match_all": {} }, "filter": { "geo_shape": { "bbox": { "shape": { "type": "envelope", "coordinates": [ [min_lon, min_lat], [max_lon, max_lat]] } } } } } }, "aggs": { "2": { "geohash_grid": { "field": "properties.reference_point", "precision": precision }, "aggs": { "area": { "sum": { "field": "properties.areaValue" } } } } } } result = es.search( index=parcel_index, doc_type=parcel_mapping, body=query, request_timeout=30) parcels_buckets = [] max = min = 0 for bucket in result['aggregations']['2']['buckets']: (lat, lng, lat_err, lng_err) = geohash.decode_exactly( bucket['key']) parcels_buckets.append( {"geometry": { "type": "Point", "coordinates": [float(lng), float(lat)]}, "properties": { "value": bucket['doc_count'], "area": bucket['area']['value']}, "type": "Feature"}) if bucket['doc_count'] > max: max = bucket['doc_count'] if bucket['doc_count'] < min: min = bucket['doc_count'] for parcel in parcels_buckets: parcel['properties']['num_buckets'] = len(parcels_buckets) parcel['properties']['min_value'] = min parcel['properties']['max_value'] = max parcels_geojson = {'type': 'FeatureCollection', 'features': parcels_buckets, 'properties': { 'total': result['hits']['total'], 'num_buckets': len(parcels_buckets), 'max': max, 'min': min } } return parcels_geojson except ElasticsearchException as e: raise ElasticException('PARCEL', e.message, e)