class esdata(object): #ES = ['127.0.0.1:9200'] # 查找具体数据 query = { "query": { "bool": { "must": [{ "match": { "name": 'a' } }] } }, "size": 100 } def __init__(self, ES): self._add = ES #创建es客户端 self.es = Elasticsearch( self._add, # 启动前嗅探es集群服务器 sniff_on_start=True, # es集群服务器结点连接异常时是否刷新es节点信息 sniff_on_connection_fail=True, # 每60秒刷新节点信息 sniffer_timeout=60) def saveData(self, list): doc = self.createdoc(list) t = self.es.bulk(index="index1", doc_type='type1', body=doc) print('insert es successfull ?', t) # r=self.es.search(index="index1",doc_type='type1',body=self.query) # print('es result is:',r) # 转变数据模型 def createdoc(self, list): doc = [] for dup in list: doc.append(dict(index={})) doc.append(dict(zip(['id', 'name', 'age'], dup))) print('doc is:', doc) return doc
class ElasticsearchBackend(BaseMetricsBackend): def __init__(self, hosts=None, index="metrics", doc_type="metric", index_pattern="{index}-{date:%Y.%m.%d}", *args, **kwargs): # Assign these in the backend as they are needed when writing metrics # to elasticsearch self.index = index self.doc_type = doc_type self.index_pattern = index_pattern # setup the client self.client = Elasticsearch(hosts=hosts, *args, **kwargs) # ensure the index is created try: self._setup_index() except TransportError as exc: logger.error('index setup error %r', exc) try: self._setup_mapping() except TransportError as exc: logger.error('mapping setup error %r', exc) def get_index(self): return self.index_pattern.format(index=self.index, date=datetime.now()) def _setup_index(self): return self.client.indices.create(self.get_index(), ignore=400) def _setup_mapping(self): return self.client.indices.put_template( name="timeexecution-{}".format(self.index), body={ "template": "{}*".format(self.index), "mappings": { self.doc_type: { "dynamic_templates": [{ "strings": { "mapping": { "type": "keyword" }, "match_mapping_type": "string" } }], "_source": { "enabled": True }, "properties": { "name": { "type": "keyword" }, "timestamp": { "type": "date" }, "hostname": { "type": "keyword" }, "value": { "type": "float" }, "origin": { "type": "keyword" }, } }, }, "settings": { "number_of_shards": "1", "number_of_replicas": "1", }, }) def write(self, name, **data): """ Write the metric to elasticsearch Args: name (str): The name of the metric to write data (dict): Additional data to store with the metric """ data["name"] = name if not ("timestamp" in data): data["timestamp"] = datetime.utcnow() try: self.client.index(index=self.get_index(), doc_type=self.doc_type, id=None, body=data) except TransportError as exc: logger.warning('writing metric %r failure %r', data, exc) def bulk_write(self, metrics): """ Write multiple metrics to elasticsearch in one request Args: metrics (list): data with mappings to send to elasticsearch """ actions = [] index = self.get_index() for metric in metrics: actions.append( {'index': { '_index': index, '_type': self.doc_type }}) actions.append(metric) try: self.client.bulk(actions) except TransportError as exc: logger.warning('bulk_write metrics %r failure %r', metrics, exc)
class GeoElasticSearch: GEO_POINTS = 'geopoints' def __init__(self, host, port, index_param, field): self.field = field self.index = index_param self.es = Elasticsearch([host + ':' + str(port)]) if not self.es.indices.exists(index=index_param): self.es.indices.create(index=index_param, body={ "mappings": { "_doc": { "properties": { field: { "type": "geo_point" } } } } }) def search(self, place): es_result = self.es.search( index=self.index, body={ "query": { "multi_match": { "query": place, "fields": [ "AddressInfo.Country.Title^4", "AddressInfo.StateOrProvince^3", "AddressInfo.Town^2", "AddressInfo.AddressLine1^1", "AddressInfo.Title^1" ] } }, "_source": [ "AddressInfo.Country.Title", "AddressInfo.StateOrProvince", "AddressInfo.Town", "AddressInfo.AddressLine1", "AddressInfo.Title", "location" ], "size": 30 } ) results = [] total_hits = int(es_result['hits']['total']) if total_hits > 0: es_hits = es_result['hits']['hits'] for es_hit in es_hits: es_source = es_hit['_source'] location = es_source['location'] if location != '' and ',' in location: split = location.split(',') address_info = es_source['AddressInfo'] result_entry = { "country" : address_info['Country']['Title'], "title" : address_info['Title'], "latitude": float(split[0]), "longitude": float(split[1]) } if 'StateOrProvince' in address_info: result_entry["stateOrProvince"] = address_info['StateOrProvince'] if 'Town' in address_info: result_entry["town"] = address_info['Town'] if 'addressLine1' in address_info: result_entry["addressLine1"] = address_info['AddressLine1'] results.append(result_entry) return { "results" : results } def upload_points(self, geo_points): es_points = '' for i in range(0, len(geo_points.points)): es_points += '{"index":{"_id":' es_points += str(i + 1) + ', "_type":"_doc"}}\n' point = geo_points.points[i] es_points += '{"location":"' + str(point.latitude) es_points += ',' + str(point.longitude) + '"}\n' print 'Add points "' + es_points + '"' self.es.bulk(index=self.index, body=es_points) def aggregate_points(self, precision): result = self.es.search( index=self.index, body={ "aggregations": { "large-grid": { "geohash_grid": { "field": self.field, "precision": precision } } } }, params={"size": 0} ) buckets = result['aggregations']['large-grid']['buckets'] return buckets def aggregate_search_with_filter(self, precision, geo_bounds, operators, kw_range, connection_types): geo_hash_filters = [] operators_filters = [] kw_range_filters = [] connection_types_filters = [] if operators and len(operators) > 0: operators_filter = { "terms" : { "OperatorID" : operators } } geo_hash_filters.append(operators_filter) kw_range_filters.append(operators_filter) connection_types_filters.append(operators_filter) if kw_range: kw_range_filter = { "range" : { "Connections.PowerKW" : { "gte" : kw_range.min, "lte" : kw_range.max } } } geo_hash_filters.append(kw_range_filter) operators_filters.append(kw_range_filter) connection_types_filters.append(kw_range_filter) if connection_types and len(connection_types) > 0: connection_types_filter = { "terms" : { "Connections.ConnectionTypeID" : connection_types } } operators_filters.append(connection_types_filter) geo_hash_filters.append(connection_types_filter) kw_range_filters.append(connection_types_filter) # Filter out nonsense values, so highest max below 1000KW kw_range_filters.append({ "range" : { "Connections.PowerKW" : { "lte" : 1000 } } }) geo_hash_filter = GeoElasticSearch._get_filter_from_list(geo_hash_filters) operator_values_filter = GeoElasticSearch._get_filter_from_list(operators_filters) kw_range_values_filter = GeoElasticSearch._get_filter_from_list(kw_range_filters) connection_types_values_filter = GeoElasticSearch._get_filter_from_list(connection_types_filters) result = self.es.search( index=self.index, body={ "aggregations": { "zoomed-in": { "filter": { "geo_bounding_box": { self.field: { "top_left": str(geo_bounds.top()) + ", " + str(geo_bounds.left()), "bottom_right": str(geo_bounds.bottom()) + ", " + str(geo_bounds.right()) } } }, "aggregations": { "zoom1": { "filter" : geo_hash_filter, "aggregations" : { "geohash_entry" : { "geohash_grid": { "field": self.field, "precision": precision } } } }, "operators" : { "filter" : operator_values_filter, "aggregations" : { "operators-filtered" : { "terms": { "field": "OperatorID", "size": 50 } } } }, "missing-operators" : { "missing" : { "field" : "OperatorID" } }, "power-kw-min" : { "filter" : kw_range_values_filter, "aggregations" : { "power-kw-min-filtered" : { "min" : { "field" : "Connections.PowerKW" } } } }, "power-kw-max" : { "filter" : kw_range_values_filter, "aggregations" : { "power-kw-max-filtered" : { "max" : { "field" : "Connections.PowerKW" } } } }, "connector-types" : { "filter" : connection_types_values_filter, "aggregations" : { "connector-types-filtered" : { "terms": { "field": "Connections.ConnectionTypeID", "size": 50 } } } }, } } } }, params={"size": 0}) geo_hash_to_count = GeoElasticSearch._get_geo_hash_to_count(result) operator_to_count = GeoElasticSearch._operator_to_count(result) connection_type_to_count = GeoElasticSearch._connection_type_to_count(result) kw_min = result['aggregations']['zoomed-in']['power-kw-min']['power-kw-min-filtered']['value'] kw_max = result['aggregations']['zoomed-in']['power-kw-max']['power-kw-max-filtered']['value'] return AggregateResult( geo_hash_to_count, operator_to_count, Range(kw_min, kw_max), connection_type_to_count) @staticmethod def _get_filter_from_list(filters): num_filters = len(filters) if num_filters == 0: result_filter = { "match_all" } elif num_filters == 1: result_filter = filters[0] elif num_filters > 1: result_filter = { "bool" : { "must" : [] } } for filter in filters: result_filter['bool']['must'].append(filter) return result_filter @staticmethod def _get_geo_hash_to_count(result): buckets = result['aggregations']['zoomed-in']['zoom1']['geohash_entry']['buckets'] result = {} for bucket in buckets: geo_hash = bucket['key'] count = bucket['doc_count'] result[geo_hash] = count return result @staticmethod def _operator_to_count(es_result): buckets = es_result['aggregations']['zoomed-in']['operators']['operators-filtered']['buckets'] result = GeoElasticSearch._buckets_to_list(buckets) missing = es_result['aggregations']['zoomed-in']['missing-operators'] count = int(missing['doc_count']) if count > 0: result.append({ "count": count }) return result @staticmethod def _connection_type_to_count(es_result): buckets = es_result['aggregations']['zoomed-in']['connector-types']['connector-types-filtered']['buckets'] return GeoElasticSearch._buckets_to_list(buckets) @staticmethod def _buckets_to_list(buckets): result = [] for bucket in buckets: operator_id = bucket['key'] count = bucket['doc_count'] result.append({ "id" : operator_id, "count": count }) return result