예제 #1
0
def query_data(index, dls):

    hosts = ['192.168.5.11', '192.168.5.12', '192.168.5.14']
    es = Elasticsearch(hosts=hosts, port=9200)

    res = es.search(index=index,
                    scroll='1m',
                    timeout='3s',
                    size=1000,
                    body=dls)
    mdata = res.get("hits").get("hits")
    if not mdata:
        print('empty!')
    scroll_id = res["_scroll_id"]
    total = res["hits"]["total"]
    for i in range(int(total / 1000)):
        res_scroll = es.scroll(scroll_id=scroll_id, scroll='1m')
        mdata += res_scroll["hits"]["hits"]
    result = mdata
    return result
예제 #2
0
                hit['_index'], hit['_type'], hit['_id'],
                created_at.strftime('%Y-%m-%d'),
                hit['_source']['description'].split('\n')[0]))

    print('=' * 80)
    print()

# get trace logger and set level
tracer = logging.getLogger('elasticsearch.trace')
tracer.setLevel(logging.INFO)
tracer.addHandler(logging.FileHandler('/tmp/es_trace.log'))
# instantiate es client, connects to localhost:9200 by default
es = Elasticsearch()

print('Empty search:')
print_hits(es.search(index='git'))

print('Find commits that says "fix" without touching tests:')
result = es.search(
    index='git',
    doc_type='doc',
    body={
      'query': {
        'bool': {
          'must': {
            'match': {'description': 'fix'}
          },
          'must_not': {
            'term': {'files': 'test_elasticsearch'}
          }
        }
예제 #3
0
            }}
          ]
        }
    }
}


def _send_alert(M):
    alert.sminion.functions['event.send'](
            'salt/{}/slack'.format(whoami),
            {
                "message": M,
            }
    )

giveme = es.search(index='pathreats-*', size=1000, _source=S, body=Q)

if giveme['hits']['total'] > 0:
  data = {}
  for hit in giveme['hits']['hits']:
    fp = hash(json.dumps(hit['_source']))
    if fp not in data:
      data[fp] = hit['_source']
else:
  print "No data"
  exit(0)

struct = {}
for K,V in data.iteritems():
  ship = "{} {} -> {}:{} {}".format(V['action'], V['src_ip'], V['dst_ip'], V['dst_port'], V['application'])
  if V['device'] not in struct:
예제 #4
0
class GeoElasticSearch:

    GEO_POINTS = 'geopoints'

    def __init__(self, host, port, index_param, field):

        self.field = field
        self.index = index_param

        self.es = Elasticsearch([host + ':' + str(port)])

        if not self.es.indices.exists(index=index_param):
            self.es.indices.create(index=index_param, body={
                "mappings": {
                    "_doc": {
                        "properties": {
                            field: {
                                "type": "geo_point"
                            }
                        }
                    }
                }
            })

    def search(self, place):

        es_result = self.es.search(
            index=self.index,
            body={
                "query": {
                    "multi_match": {
                        "query": place,
                        "fields": [
                            "AddressInfo.Country.Title^4",
                            "AddressInfo.StateOrProvince^3",
                            "AddressInfo.Town^2",
                            "AddressInfo.AddressLine1^1",
                            "AddressInfo.Title^1"
                        ]
                    }
                },
                "_source": [
                    "AddressInfo.Country.Title",
                    "AddressInfo.StateOrProvince",
                    "AddressInfo.Town",
                    "AddressInfo.AddressLine1",
                    "AddressInfo.Title",
                    "location"
                ],
                "size": 30
            }
        )

        results = []
        
        total_hits = int(es_result['hits']['total'])

        if total_hits > 0:
            es_hits = es_result['hits']['hits']

            for es_hit in es_hits:

                es_source = es_hit['_source']
                location = es_source['location']

                if location != '' and ',' in location:

                    split = location.split(',')
                    address_info = es_source['AddressInfo']

                    result_entry = {
                        "country" : address_info['Country']['Title'],
                        "title" : address_info['Title'],
                        
                        "latitude": float(split[0]),
                        "longitude": float(split[1])
                    }

                    if 'StateOrProvince' in address_info:
                        result_entry["stateOrProvince"] = address_info['StateOrProvince']

                    if 'Town' in address_info:
                        result_entry["town"] = address_info['Town']

                    if 'addressLine1' in address_info:
                        result_entry["addressLine1"] = address_info['AddressLine1']

                    results.append(result_entry)

        return { "results" : results }

    def upload_points(self, geo_points):

        es_points = ''
        for i in range(0, len(geo_points.points)):
            es_points += '{"index":{"_id":'
            es_points += str(i + 1) + ', "_type":"_doc"}}\n'

            point = geo_points.points[i]
            es_points += '{"location":"' + str(point.latitude)
            es_points += ',' + str(point.longitude) + '"}\n'

        print 'Add points "' + es_points + '"'
        self.es.bulk(index=self.index, body=es_points)

    def aggregate_points(self, precision):
        result = self.es.search(
            index=self.index,
            body={
                "aggregations": {
                    "large-grid": {
                        "geohash_grid": {
                            "field": self.field,
                            "precision": precision
                        }
                    }
                }
            },
            params={"size": 0}
        )

        buckets = result['aggregations']['large-grid']['buckets']

        return buckets

    def aggregate_search_with_filter(self, precision, geo_bounds, operators, kw_range, connection_types):
        
        geo_hash_filters = []
        operators_filters = []
        kw_range_filters = []
        connection_types_filters = []

        if operators and len(operators) > 0:
            operators_filter = {
                "terms" : {
                    "OperatorID" : operators
                }
            }

            geo_hash_filters.append(operators_filter)
            kw_range_filters.append(operators_filter)
            connection_types_filters.append(operators_filter)

        if kw_range:
            kw_range_filter = {
                "range" : {
                    "Connections.PowerKW" : { "gte" : kw_range.min, "lte" : kw_range.max }
                }
            }

            geo_hash_filters.append(kw_range_filter)
            operators_filters.append(kw_range_filter)
            connection_types_filters.append(kw_range_filter)

        if connection_types and len(connection_types) > 0:
            connection_types_filter = {
                "terms" : {
                    "Connections.ConnectionTypeID" : connection_types
                }
            }

            operators_filters.append(connection_types_filter)
            geo_hash_filters.append(connection_types_filter)
            kw_range_filters.append(connection_types_filter)

        # Filter out nonsense values, so highest max below 1000KW
        kw_range_filters.append({ "range" : { "Connections.PowerKW" : { "lte" : 1000 } } })

        geo_hash_filter        = GeoElasticSearch._get_filter_from_list(geo_hash_filters)
        operator_values_filter = GeoElasticSearch._get_filter_from_list(operators_filters)
        kw_range_values_filter = GeoElasticSearch._get_filter_from_list(kw_range_filters)
        connection_types_values_filter = GeoElasticSearch._get_filter_from_list(connection_types_filters)

        result = self.es.search(
            index=self.index,
            body={
                "aggregations": {
                    "zoomed-in": {
                        "filter": {
                            "geo_bounding_box": {
                                self.field: {
                                    "top_left": str(geo_bounds.top()) + ", " + str(geo_bounds.left()),
                                    "bottom_right": str(geo_bounds.bottom()) + ", " + str(geo_bounds.right())
                                }
                            }
                        },
                        "aggregations": {
                            "zoom1": {
                                "filter" : geo_hash_filter,
                                "aggregations" : {
                                    "geohash_entry" : {
                                        "geohash_grid": {
                                            "field": self.field,
                                            "precision": precision
                                        }
                                    }
                                }
                            },
                            "operators" : {
                                "filter" : operator_values_filter,
                                "aggregations" : {
                                    "operators-filtered" : {
                                        "terms": {
                                            "field": "OperatorID",
                                            "size": 50
                                        }
                                    }
                                }
                            },
                            "missing-operators" : {
                                "missing" : { "field" : "OperatorID" }
                            },
                            "power-kw-min" : {
                                "filter" : kw_range_values_filter,
                                "aggregations" : {
                                    "power-kw-min-filtered" : {
                                        "min" : { "field" : "Connections.PowerKW" }
                                    }
                                }
                            },
                            "power-kw-max" : {
                                "filter" : kw_range_values_filter,
                                "aggregations" : {
                                    "power-kw-max-filtered" : {
                                        "max" : { "field" : "Connections.PowerKW" }
                                    }
                                }
                            },
                            "connector-types" : {
                                "filter" : connection_types_values_filter,
                                "aggregations" : {
                                    "connector-types-filtered" : {
                                        "terms": {
                                            "field": "Connections.ConnectionTypeID",
                                            "size": 50
                                        }
                                    }
                                }
                            },
                        }
                    }
                }
            },
            params={"size": 0})

        geo_hash_to_count = GeoElasticSearch._get_geo_hash_to_count(result)
        operator_to_count = GeoElasticSearch._operator_to_count(result)
        connection_type_to_count = GeoElasticSearch._connection_type_to_count(result)

        kw_min = result['aggregations']['zoomed-in']['power-kw-min']['power-kw-min-filtered']['value']
        kw_max = result['aggregations']['zoomed-in']['power-kw-max']['power-kw-max-filtered']['value']

        return AggregateResult(
            geo_hash_to_count,
            operator_to_count,
            Range(kw_min, kw_max),
            connection_type_to_count)

    @staticmethod
    def _get_filter_from_list(filters):
        num_filters = len(filters)
        
        if num_filters == 0:
            result_filter = { "match_all" }

        elif num_filters == 1:
            result_filter = filters[0]

        elif num_filters > 1:

            result_filter = {
                "bool" : {
                    "must" : []
                }
            }

            for filter in filters:
                result_filter['bool']['must'].append(filter)

        return result_filter


    
    @staticmethod
    def _get_geo_hash_to_count(result):
        buckets = result['aggregations']['zoomed-in']['zoom1']['geohash_entry']['buckets']

        result = {}

        for bucket in buckets:
            geo_hash = bucket['key']
            count = bucket['doc_count']

            result[geo_hash] = count

        return result

    @staticmethod
    def _operator_to_count(es_result):
        buckets = es_result['aggregations']['zoomed-in']['operators']['operators-filtered']['buckets']

        result = GeoElasticSearch._buckets_to_list(buckets)

        missing = es_result['aggregations']['zoomed-in']['missing-operators']
        
        count = int(missing['doc_count'])

        if count > 0:
            result.append({
                "count": count
            })

        return result

    @staticmethod
    def _connection_type_to_count(es_result):
        buckets = es_result['aggregations']['zoomed-in']['connector-types']['connector-types-filtered']['buckets']

        return GeoElasticSearch._buckets_to_list(buckets)

    @staticmethod
    def _buckets_to_list(buckets):
        result = []

        for bucket in buckets:
            operator_id = bucket['key']
            count = bucket['doc_count']

            result.append({
                "id" : operator_id,
                "count": count
            })

        return result