예제 #1
0
class esdata(object):

    #ES = ['127.0.0.1:9200']
    # 查找具体数据
    query = {
        "query": {
            "bool": {
                "must": [{
                    "match": {
                        "name": 'a'
                    }
                }]
            }
        },
        "size": 100
    }

    def __init__(self, ES):
        self._add = ES
        #创建es客户端
        self.es = Elasticsearch(
            self._add,
            # 启动前嗅探es集群服务器
            sniff_on_start=True,
            # es集群服务器结点连接异常时是否刷新es节点信息
            sniff_on_connection_fail=True,
            # 每60秒刷新节点信息
            sniffer_timeout=60)

    def saveData(self, list):
        doc = self.createdoc(list)
        t = self.es.bulk(index="index1", doc_type='type1', body=doc)

        print('insert es successfull ?', t)

        # r=self.es.search(index="index1",doc_type='type1',body=self.query)
        # print('es result is:',r)

# 转变数据模型

    def createdoc(self, list):
        doc = []
        for dup in list:
            doc.append(dict(index={}))
            doc.append(dict(zip(['id', 'name', 'age'], dup)))

        print('doc is:', doc)
        return doc
예제 #2
0
class ElasticsearchBackend(BaseMetricsBackend):
    def __init__(self,
                 hosts=None,
                 index="metrics",
                 doc_type="metric",
                 index_pattern="{index}-{date:%Y.%m.%d}",
                 *args,
                 **kwargs):
        # Assign these in the backend as they are needed when writing metrics
        # to elasticsearch
        self.index = index
        self.doc_type = doc_type
        self.index_pattern = index_pattern

        # setup the client
        self.client = Elasticsearch(hosts=hosts, *args, **kwargs)

        # ensure the index is created
        try:
            self._setup_index()
        except TransportError as exc:
            logger.error('index setup error %r', exc)
        try:
            self._setup_mapping()
        except TransportError as exc:
            logger.error('mapping setup error %r', exc)

    def get_index(self):
        return self.index_pattern.format(index=self.index, date=datetime.now())

    def _setup_index(self):
        return self.client.indices.create(self.get_index(), ignore=400)

    def _setup_mapping(self):
        return self.client.indices.put_template(
            name="timeexecution-{}".format(self.index),
            body={
                "template": "{}*".format(self.index),
                "mappings": {
                    self.doc_type: {
                        "dynamic_templates": [{
                            "strings": {
                                "mapping": {
                                    "type": "keyword"
                                },
                                "match_mapping_type": "string"
                            }
                        }],
                        "_source": {
                            "enabled": True
                        },
                        "properties": {
                            "name": {
                                "type": "keyword"
                            },
                            "timestamp": {
                                "type": "date"
                            },
                            "hostname": {
                                "type": "keyword"
                            },
                            "value": {
                                "type": "float"
                            },
                            "origin": {
                                "type": "keyword"
                            },
                        }
                    },
                },
                "settings": {
                    "number_of_shards": "1",
                    "number_of_replicas": "1",
                },
            })

    def write(self, name, **data):
        """
        Write the metric to elasticsearch

        Args:
            name (str): The name of the metric to write
            data (dict): Additional data to store with the metric
        """

        data["name"] = name
        if not ("timestamp" in data):
            data["timestamp"] = datetime.utcnow()

        try:
            self.client.index(index=self.get_index(),
                              doc_type=self.doc_type,
                              id=None,
                              body=data)
        except TransportError as exc:
            logger.warning('writing metric %r failure %r', data, exc)

    def bulk_write(self, metrics):
        """
        Write multiple metrics to elasticsearch in one request

        Args:
            metrics (list): data with mappings to send to elasticsearch
        """
        actions = []
        index = self.get_index()
        for metric in metrics:
            actions.append(
                {'index': {
                    '_index': index,
                    '_type': self.doc_type
                }})
            actions.append(metric)
        try:
            self.client.bulk(actions)
        except TransportError as exc:
            logger.warning('bulk_write metrics %r failure %r', metrics, exc)
예제 #3
0
class GeoElasticSearch:

    GEO_POINTS = 'geopoints'

    def __init__(self, host, port, index_param, field):

        self.field = field
        self.index = index_param

        self.es = Elasticsearch([host + ':' + str(port)])

        if not self.es.indices.exists(index=index_param):
            self.es.indices.create(index=index_param, body={
                "mappings": {
                    "_doc": {
                        "properties": {
                            field: {
                                "type": "geo_point"
                            }
                        }
                    }
                }
            })

    def search(self, place):

        es_result = self.es.search(
            index=self.index,
            body={
                "query": {
                    "multi_match": {
                        "query": place,
                        "fields": [
                            "AddressInfo.Country.Title^4",
                            "AddressInfo.StateOrProvince^3",
                            "AddressInfo.Town^2",
                            "AddressInfo.AddressLine1^1",
                            "AddressInfo.Title^1"
                        ]
                    }
                },
                "_source": [
                    "AddressInfo.Country.Title",
                    "AddressInfo.StateOrProvince",
                    "AddressInfo.Town",
                    "AddressInfo.AddressLine1",
                    "AddressInfo.Title",
                    "location"
                ],
                "size": 30
            }
        )

        results = []
        
        total_hits = int(es_result['hits']['total'])

        if total_hits > 0:
            es_hits = es_result['hits']['hits']

            for es_hit in es_hits:

                es_source = es_hit['_source']
                location = es_source['location']

                if location != '' and ',' in location:

                    split = location.split(',')
                    address_info = es_source['AddressInfo']

                    result_entry = {
                        "country" : address_info['Country']['Title'],
                        "title" : address_info['Title'],
                        
                        "latitude": float(split[0]),
                        "longitude": float(split[1])
                    }

                    if 'StateOrProvince' in address_info:
                        result_entry["stateOrProvince"] = address_info['StateOrProvince']

                    if 'Town' in address_info:
                        result_entry["town"] = address_info['Town']

                    if 'addressLine1' in address_info:
                        result_entry["addressLine1"] = address_info['AddressLine1']

                    results.append(result_entry)

        return { "results" : results }

    def upload_points(self, geo_points):

        es_points = ''
        for i in range(0, len(geo_points.points)):
            es_points += '{"index":{"_id":'
            es_points += str(i + 1) + ', "_type":"_doc"}}\n'

            point = geo_points.points[i]
            es_points += '{"location":"' + str(point.latitude)
            es_points += ',' + str(point.longitude) + '"}\n'

        print 'Add points "' + es_points + '"'
        self.es.bulk(index=self.index, body=es_points)

    def aggregate_points(self, precision):
        result = self.es.search(
            index=self.index,
            body={
                "aggregations": {
                    "large-grid": {
                        "geohash_grid": {
                            "field": self.field,
                            "precision": precision
                        }
                    }
                }
            },
            params={"size": 0}
        )

        buckets = result['aggregations']['large-grid']['buckets']

        return buckets

    def aggregate_search_with_filter(self, precision, geo_bounds, operators, kw_range, connection_types):
        
        geo_hash_filters = []
        operators_filters = []
        kw_range_filters = []
        connection_types_filters = []

        if operators and len(operators) > 0:
            operators_filter = {
                "terms" : {
                    "OperatorID" : operators
                }
            }

            geo_hash_filters.append(operators_filter)
            kw_range_filters.append(operators_filter)
            connection_types_filters.append(operators_filter)

        if kw_range:
            kw_range_filter = {
                "range" : {
                    "Connections.PowerKW" : { "gte" : kw_range.min, "lte" : kw_range.max }
                }
            }

            geo_hash_filters.append(kw_range_filter)
            operators_filters.append(kw_range_filter)
            connection_types_filters.append(kw_range_filter)

        if connection_types and len(connection_types) > 0:
            connection_types_filter = {
                "terms" : {
                    "Connections.ConnectionTypeID" : connection_types
                }
            }

            operators_filters.append(connection_types_filter)
            geo_hash_filters.append(connection_types_filter)
            kw_range_filters.append(connection_types_filter)

        # Filter out nonsense values, so highest max below 1000KW
        kw_range_filters.append({ "range" : { "Connections.PowerKW" : { "lte" : 1000 } } })

        geo_hash_filter        = GeoElasticSearch._get_filter_from_list(geo_hash_filters)
        operator_values_filter = GeoElasticSearch._get_filter_from_list(operators_filters)
        kw_range_values_filter = GeoElasticSearch._get_filter_from_list(kw_range_filters)
        connection_types_values_filter = GeoElasticSearch._get_filter_from_list(connection_types_filters)

        result = self.es.search(
            index=self.index,
            body={
                "aggregations": {
                    "zoomed-in": {
                        "filter": {
                            "geo_bounding_box": {
                                self.field: {
                                    "top_left": str(geo_bounds.top()) + ", " + str(geo_bounds.left()),
                                    "bottom_right": str(geo_bounds.bottom()) + ", " + str(geo_bounds.right())
                                }
                            }
                        },
                        "aggregations": {
                            "zoom1": {
                                "filter" : geo_hash_filter,
                                "aggregations" : {
                                    "geohash_entry" : {
                                        "geohash_grid": {
                                            "field": self.field,
                                            "precision": precision
                                        }
                                    }
                                }
                            },
                            "operators" : {
                                "filter" : operator_values_filter,
                                "aggregations" : {
                                    "operators-filtered" : {
                                        "terms": {
                                            "field": "OperatorID",
                                            "size": 50
                                        }
                                    }
                                }
                            },
                            "missing-operators" : {
                                "missing" : { "field" : "OperatorID" }
                            },
                            "power-kw-min" : {
                                "filter" : kw_range_values_filter,
                                "aggregations" : {
                                    "power-kw-min-filtered" : {
                                        "min" : { "field" : "Connections.PowerKW" }
                                    }
                                }
                            },
                            "power-kw-max" : {
                                "filter" : kw_range_values_filter,
                                "aggregations" : {
                                    "power-kw-max-filtered" : {
                                        "max" : { "field" : "Connections.PowerKW" }
                                    }
                                }
                            },
                            "connector-types" : {
                                "filter" : connection_types_values_filter,
                                "aggregations" : {
                                    "connector-types-filtered" : {
                                        "terms": {
                                            "field": "Connections.ConnectionTypeID",
                                            "size": 50
                                        }
                                    }
                                }
                            },
                        }
                    }
                }
            },
            params={"size": 0})

        geo_hash_to_count = GeoElasticSearch._get_geo_hash_to_count(result)
        operator_to_count = GeoElasticSearch._operator_to_count(result)
        connection_type_to_count = GeoElasticSearch._connection_type_to_count(result)

        kw_min = result['aggregations']['zoomed-in']['power-kw-min']['power-kw-min-filtered']['value']
        kw_max = result['aggregations']['zoomed-in']['power-kw-max']['power-kw-max-filtered']['value']

        return AggregateResult(
            geo_hash_to_count,
            operator_to_count,
            Range(kw_min, kw_max),
            connection_type_to_count)

    @staticmethod
    def _get_filter_from_list(filters):
        num_filters = len(filters)
        
        if num_filters == 0:
            result_filter = { "match_all" }

        elif num_filters == 1:
            result_filter = filters[0]

        elif num_filters > 1:

            result_filter = {
                "bool" : {
                    "must" : []
                }
            }

            for filter in filters:
                result_filter['bool']['must'].append(filter)

        return result_filter


    
    @staticmethod
    def _get_geo_hash_to_count(result):
        buckets = result['aggregations']['zoomed-in']['zoom1']['geohash_entry']['buckets']

        result = {}

        for bucket in buckets:
            geo_hash = bucket['key']
            count = bucket['doc_count']

            result[geo_hash] = count

        return result

    @staticmethod
    def _operator_to_count(es_result):
        buckets = es_result['aggregations']['zoomed-in']['operators']['operators-filtered']['buckets']

        result = GeoElasticSearch._buckets_to_list(buckets)

        missing = es_result['aggregations']['zoomed-in']['missing-operators']
        
        count = int(missing['doc_count'])

        if count > 0:
            result.append({
                "count": count
            })

        return result

    @staticmethod
    def _connection_type_to_count(es_result):
        buckets = es_result['aggregations']['zoomed-in']['connector-types']['connector-types-filtered']['buckets']

        return GeoElasticSearch._buckets_to_list(buckets)

    @staticmethod
    def _buckets_to_list(buckets):
        result = []

        for bucket in buckets:
            operator_id = bucket['key']
            count = bucket['doc_count']

            result.append({
                "id" : operator_id,
                "count": count
            })

        return result