Beispiel #1
0
def elastic_bulk_save(process_name,
                      index,
                      doc_type,
                      records,
                      ids=None,
                      parents=None,
                      retry=True):
    try:
        if ids is None:
            ids = [None] * len(records)

        if parents is None:
            parents = [None] * len(records)

        wait_for_yellow_cluster_status(process_name)
        es_helpers.bulk(es, ({
            '_index': index,
            '_id': idx,
            '_parent': parent,
            '_type': doc_type,
            '_source': r
        } for (r, idx, parent) in zip(records, ids, parents)))
    except Exception as e:
        for (r, idx) in zip(records, ids):
            if retry:
                elastic_bulk_save(process_name,
                                  index,
                                  doc_type, [r], [idx],
                                  retry=False)
            else:
                raise ElasticException(process_name,
                                       'Error saving to Elastic',
                                       actionable_info=str(r),
                                       cause=str(e))
Beispiel #2
0
def elastic_update(process_name, index, doc_type, record, id):
    try:
        wait_for_yellow_cluster_status(process_name)
        es.update(index=index, doc_type=doc_type, id=id, body=record)
    except Exception as e:
        raise ElasticException(process_name,
                               'Error updating {} in Elastic'.format(id),
                               actionable_info=str(record),
                               cause=str(e))
Beispiel #3
0
def index_crops_in_elastic(crop_id, query):
    try:
        es.index(index=crop_index,
                 doc_type=crop_mapping,
                 id=crop_id,
                 body=query)
    except ElasticsearchException as e:
        raise ElasticException('CROPS',
                               'ElasticSearch Error from query: ' + str(query),
                               e)
Beispiel #4
0
def retrieve_crops_from_elastic(query):
    try:
        result = es.search(index=crop_index, doc_type=crop_mapping, body=query)

        crops = [hits for hits in result['hits']['hits']]

        return {'total': result['hits']['total'], 'crops': crops}
    except ElasticsearchException as e:
        raise ElasticException('CROPS',
                               'ElasticSearch Error from query: ' + str(query),
                               e)
Beispiel #5
0
def index_parcel(parcel):
    try:
        es.index(index=parcel_index,
                 doc_type=parcel_mapping,
                 body=parcel,
                 id=Parcel.get_cadastral_reference(parcel))
    except ElasticsearchException as e:
        raise ElasticException('CADASTRE',
                               'Error indexing parcel',
                               cause=e,
                               actionable_info=parcel)
Beispiel #6
0
def get_parcels_by_bbox(min_lat, min_lon, max_lat, max_lon):
    try:
        query = {
            "query": {
                "bool": {
                    "must": {
                        "match_all": {}
                    },
                    "filter": {
                        "geo_shape": {
                            "bbox": {
                                "shape": {
                                    "type": "envelope",
                                    "coordinates": [
                                        [min_lon, min_lat],
                                        [max_lon, max_lat]]
                                }
                            }
                        }
                    }
                }
            }
        }

        result = es.search(index=parcel_index,
                           doc_type=parcel_mapping,
                           body=query,
                           size=max_elastic_query_size)

        parcels = [hits['_source'] for hits in result['hits']['hits']]

        if query_cadastre_when_bbox:
            to_update = list()
            to_update += add_public_cadastral_info(parcels)
            to_update += add_elevation_from_google(parcels)

            to_update = set(to_update)

            updatable_parcels = [parcel for parcel in parcels
                                 if Parcel.get_cadastral_reference(parcel)
                                 in to_update]
            store_parcels(updatable_parcels)

        # Convert into geojson
        for parcel in parcels:
            parcel['type'] = 'Feature'

        parcels_geojson = {'type': 'FeatureCollection',
                           'features': parcels}

        return parcels_geojson
    except ElasticsearchException as e:
        raise ElasticException('PARCEL', e.message, e)
Beispiel #7
0
def wait_for_yellow_cluster_status(process_name):
    logger.debug('Check cluster status, waiting for yellow or green status...')
    for retry in itertools.count():
        with timer.elapsed_timer() as elapsed:
            if elapsed() < timeout:
                try:
                    cluster_status = es.cluster.health(
                        wait_for_status='yellow', timeout=timeout)
                    logger.debug('Cluster status: %s',
                                 cluster_status['status'])
                    if cluster_status['status'] != 'red':
                        break
                    raise Exception('Red status')
                except TransportError as e:
                    if retry > settings.ELASTICSEARCH['retries']:
                        raise ElasticException(process_name,
                                               'Error connecting to elastic',
                                               cause=str(e))
                except Exception as e:
                    if retry > settings.ELASTICSEARCH['retries']:
                        raise ElasticException(
                            process_name,
                            'Error waiting for yellow status',
                            cause=str(e))
Beispiel #8
0
def get_closest_station(lat, lon):
    try:
        query = {
            "size":
            1,
            "sort": [{
                "_geo_distance": {
                    "lat_lon": {
                        "lat": lat,
                        "lon": lon
                    },
                    "order": "asc",
                    "unit": "km",
                    "mode": "min",
                    "distance_type": "sloppy_arc"
                }
            }]
        }

        result = es.search(index=es_index,
                           doc_type=es_station_mapping,
                           body=query)
        stations = [hits['_source'] for hits in result['hits']['hits']]

        closest_station = {}

        if len(stations) == 1:
            closest_station = stations[0]

            this_loc = (lat, lon)
            station_loc = (closest_station['lat_lon']['lat'],
                           closest_station['lat_lon']['lon'])

            closest_station['distance_to_parcel'] = \
                great_circle(this_loc, station_loc).kilometers

        return closest_station

    except ElasticsearchException as e:
        raise ElasticException('CLIMATE',
                               'ElasticSearch Error getting closest station',
                               e)
Beispiel #9
0
def get_closest_soil_measure(lat, lon):
    try:
        query = {
            "size":
            1,
            "sort": [{
                "_geo_distance": {
                    "coordinates": {
                        "lat": lat,
                        "lon": lon
                    },
                    "order": "asc",
                    "unit": "km",
                    "mode": "min",
                    "distance_type": "sloppy_arc"
                }
            }]
        }

        result = es.search(index=es_index,
                           doc_type=es_soil_mapping,
                           body=query)

        soil_measures = [hits['_source'] for hits in result['hits']['hits']]
        soil_measures_distances = [
            hits['sort'] for hits in result['hits']['hits']
        ]

        closest_soil_measure = {}

        if len(soil_measures) == 1:
            closest_soil_measure = soil_measures[0]
            closest_soil_measure[
                'distance_to_parcel'] = soil_measures_distances[0][0]
        return closest_soil_measure

    except ElasticsearchException as e:
        raise ElasticException(
            'SOIL', 'ElasticSearch Error getting closest soil measure', e)
Beispiel #10
0
def store_daily_document(document,
                         lat_lon,
                         altitud,
                         index=es_index,
                         mapping=es_daily_mapping):
    document['lat_lon'] = lat_lon
    document['altitud'] = altitud
    try:
        if document['HORMINHUMMAX'] is not None:
            document['HORMINHUMMAX'] = document['HORMINHUMMAX'].zfill(
                4).replace('2400', '0000')

        if document['HORMINHUMMIN'] is not None:
            document['HORMINHUMMIN'] = document['HORMINHUMMIN'].zfill(
                4).replace('2400', '0000')

        if document['HORMINTEMPMAX'] is not None:
            document['HORMINTEMPMAX'] = document['HORMINTEMPMAX'].zfill(
                4).replace('2400', '0000')

        if document['HORMINTEMPMIN'] is not None:
            document['HORMINTEMPMIN'] = document['HORMINTEMPMIN'].zfill(
                4).replace('2400', '0000')

        if document['HORMINVELMAX'] is not None:
            document['HORMINVELMAX'] = document['HORMINVELMAX'].zfill(
                4).replace('2400', '0000')

        id = document[u'FECHA'].replace('/', '_') + '_' + \
             document[u'IDPROVINCIA'] + '_' + \
             document[u'IDESTACION']

        util.wait_for_yellow_cluster_status('STORE_INFORIEGO_DAILY')
        es.index(index=index, doc_type=mapping, id=id, body=document)
    except Exception as e:
        raise ElasticException('INFORIEGO',
                               'Error saving to Elastic',
                               actionable_info=str(document))
Beispiel #11
0
def get_parcels_by_cadastral_code(cadastral_code,
                                  include_public_info=False,
                                  include_google_info=False):
    logger.debug('get_parcels_by_cadastral_code(%s,%s)',
                 cadastral_code,
                 include_public_info)
    try:
        query = {
            "query": {
                "match": {
                    "properties.nationalCadastralReference": cadastral_code
                }
            }
        }

        result = es.search(index=parcel_index,
                           doc_type=parcel_mapping,
                           body=query)

        parcels = [hits['_source'] for hits in result['hits']['hits']]

        if not parcels:
            parcels = get_inspire_data_by_code(cadastral_code)

        if include_public_info:
            add_public_cadastral_info(parcels)

        # Convert into geojson
        for parcel in parcels:
            parcel['type'] = 'Feature'
        if include_google_info:
            add_elevation_from_google(parcels)

        return parcels
    except ElasticsearchException as e:
        raise ElasticException('PARCEL', e.message, e)
Beispiel #12
0
def get_aggregated_climate_measures(station_id, province_id, num_years_back):
    try:
        query = {
            "size": 0,
            "query": {
                "constant_score": {
                    "filter": {
                        "bool": {
                            "must": [{
                                "term": {
                                    "IDESTACION": station_id
                                }
                            }, {
                                "term": {
                                    "IDPROVINCIA": province_id
                                }
                            }]
                        }
                    }
                }
            },
            "aggs": {
                "last_year": {
                    "terms": {
                        "field": u'AÑO',
                        "order": {
                            "_term": "desc"
                        },
                        "size": 1
                    },
                    "aggs": {
                        "sum_rainfall": {
                            "sum": {
                                "field": "PRECIPITACION"
                            }
                        },
                        "max_temperature": {
                            "max": {
                                "field": "TEMPMAX"
                            }
                        },
                        "min_temperature": {
                            "min": {
                                "field": "TEMPMIN"
                            }
                        },
                        "avg_temperature": {
                            "avg": {
                                "field": "TEMPMEDIA"
                            }
                        },
                        "avg_sun_hours": {
                            "avg": {
                                "field": "N"
                            }
                        },
                        "max_sun_hours": {
                            "max": {
                                "field": "N"
                            }
                        },
                        "sum_sun_hours": {
                            "sum": {
                                "field": "N"
                            }
                        },
                        "avg_radiation": {
                            "avg": {
                                "field": "RADIACION"
                            }
                        },
                        "max_radiation": {
                            "max": {
                                "field": "RADIACION"
                            }
                        },
                        "sum_radiation": {
                            "sum": {
                                "field": "RADIACION"
                            }
                        },
                        "rainy_days": {
                            "filter": {
                                "range": {
                                    "PRECIPITACION": {
                                        "gt": 0
                                    }
                                }
                            }
                        }
                    }
                },
                "by_month": {
                    "terms": {
                        "field": u'AÑO',
                        "order": {
                            "_term": "desc"
                        },
                        "size": num_years_back
                    },
                    "aggs": {
                        "measure": {
                            "date_histogram": {
                                "field": "FECHA",
                                "interval": "month",
                                "format": "M"
                            },
                            "aggs": {
                                "rainfall": {
                                    "sum": {
                                        "field": "PRECIPITACION"
                                    }
                                },
                                "avg_temperature": {
                                    "avg": {
                                        "field": "TEMPMEDIA"
                                    }
                                },
                                "sun_hours": {
                                    "sum": {
                                        "field": "N"
                                    }
                                },
                                "radiation": {
                                    "sum": {
                                        "field": "RADIACION"
                                    }
                                }
                            }
                        }
                    }
                },
                "by_day": {
                    "terms": {
                        "field": u'AÑO',
                        "order": {
                            "_term": "desc"
                        },
                        "size": num_years_back
                    },
                    "aggs": {
                        "measure": {
                            "date_histogram": {
                                "field": "FECHA",
                                "interval": "day",
                                "format": "dd-MM-yyyy"
                            },
                            "aggs": {
                                "avg_temperature": {
                                    "avg": {
                                        "field": "TEMPMEDIA"
                                    }
                                },
                                "sun_hours": {
                                    "sum": {
                                        "field": "N"
                                    }
                                },
                                "radiation": {
                                    "sum": {
                                        "field": "RADIACION"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        result = es.search(index=es_index,
                           doc_type=es_daily_mapping,
                           body=query)
        hit = result['aggregations']

        by_month = parse_by_month(hit['by_month']['buckets'])
        by_day = parse_by_day(hit['by_day']['buckets'])
        last_year = parse_last_year(hit['last_year']['buckets'])

        return {'by_month': by_month, 'by_day': by_day, 'last_year': last_year}
    except ElasticsearchException as e:
        raise ElasticException('CLIMATE',
                               'ElasticSearch error getting climate aggrs', e)
Beispiel #13
0
def get_bucket_of_parcels_by_bbox_and_precision(
        min_lat, min_lon, max_lat, max_lon, precision):
    try:
        query = {
            "size": 0,
            "query": {
                "bool": {
                    "must": {
                        "match_all": {}
                    },
                    "filter": {
                        "geo_shape": {
                            "bbox": {
                                "shape": {
                                    "type": "envelope",
                                    "coordinates": [
                                        [min_lon, min_lat],
                                        [max_lon, max_lat]]
                                }
                            }
                        }
                    }
                }
            },
            "aggs": {
                "2": {
                    "geohash_grid": {
                        "field": "properties.reference_point",
                        "precision": precision
                    },
                    "aggs": {
                        "area": {
                            "sum": {
                                "field": "properties.areaValue"
                            }
                        }
                    }
                }
            }
        }

        result = es.search(
            index=parcel_index,
            doc_type=parcel_mapping,
            body=query,
            request_timeout=30)

        parcels_buckets = []
        max = min = 0
        for bucket in result['aggregations']['2']['buckets']:
            (lat, lng, lat_err, lng_err) = geohash.decode_exactly(
                bucket['key'])
            parcels_buckets.append(
                {"geometry": {
                    "type": "Point",
                    "coordinates": [float(lng), float(lat)]},
                    "properties": {
                        "value": bucket['doc_count'],
                        "area": bucket['area']['value']},
                    "type": "Feature"})
            if bucket['doc_count'] > max:
                max = bucket['doc_count']
            if bucket['doc_count'] < min:
                min = bucket['doc_count']

        for parcel in parcels_buckets:
            parcel['properties']['num_buckets'] = len(parcels_buckets)
            parcel['properties']['min_value'] = min
            parcel['properties']['max_value'] = max

        parcels_geojson = {'type': 'FeatureCollection',
                           'features': parcels_buckets,
                           'properties': {
                               'total': result['hits']['total'],
                               'num_buckets': len(parcels_buckets),
                               'max': max,
                               'min': min
                               }
                           }

        return parcels_geojson
    except ElasticsearchException as e:
        raise ElasticException('PARCEL', e.message, e)