コード例 #1
0
    def import_intent(self, inputFile):
        '''
        :param inputFile:file need to be import to es database,csv format,see import_test.csv
        :return: None
        '''
        es = elasticsearch5.Elasticsearch(['localhost:9200'])
        index = ES_INDEX

        # create index
        mapping = '''
                {  
                  "mappings":{  
                    "test":{  
                      "properties":{  
                        "context_query":{  
                          "type":"text",
                          "analyzer":"ik_max_word",
                          "search_analyzer":"ik_max_word"
                        },
                        "response":{  
                          "type":"keyword"
                        },
                      }
                    }
                  }
                }'''
        # 查询数据库是否存在,不存在则创建,存在则不做修改
        try:
            es.search(index=ES_INDEX)
        except:
            es.indices.create(index=index, ignore=400, body=mapping)

        def bulk_data(index_name, df):
            for i, row in df.iterrows():
                json_body = {}
                json_body['query_context'] = row['query_context']
                json_body['response'] = row['response']

                doc = {}
                doc['_op_type'] = 'index'
                doc['_index'] = index_name
                doc['_type'] = ES_TYPE
                # user_say和intent作为_id
                doc['_id'] = hashlib.md5(
                    (json_body['query_context'] +
                     json_body['response']).encode('utf8')).hexdigest()
                doc['_source'] = json_body

                yield doc

        if isinstance(inputFile, pd.DataFrame):
            df = inputFile
        else:
            df = pd.read_csv(inputFile, dtype=object)

        for success, info in parallel_bulk(client=es,
                                           actions=bulk_data(index, df),
                                           thread_count=16):
            if not success:
                print('Doc failed', info)
コード例 #2
0
ファイル: shared_elastic.py プロジェクト: unforensicate/plaso
  def _Connect(self):
    """Connects to an Elasticsearch server."""
    elastic_hosts = [{'host': self._host, 'port': self._port}]

    elastic_http_auth = None
    if self._username is not None:
      elastic_http_auth = (self._username, self._password)

    self._client = elasticsearch5.Elasticsearch(
        elastic_hosts, http_auth=elastic_http_auth)

    logger.debug('Connected to Elasticsearch server: {0:s} port: {1:d}.'.format(
        self._host, self._port))
コード例 #3
0
from kafka import KafkaConsumer
import elasticsearch5
import json
from datetime import datetime
import logging

logging.basicConfig(
    filename='/data/silk/log/consumer.log',
    filemode='w+',
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
es = elasticsearch5.Elasticsearch(["", ""], http_auth=('username', 'password'))
consumer = KafkaConsumer("sec-log-S0",
                         bootstrap_servers=['', '', ''],
                         group_id="group1",
                         sasl_plain_username='',
                         sasl_plain_password='',
                         security_protocol="SASL_PLAINTEXT",
                         sasl_mechanism='PLAIN')
for msg in consumer:
    es_value = dict()
    es_value = json.loads(msg.value)
    # transfer the time format to adapt the ES
    # unify the utc time for the right time line
    # the kibana will +8 automatically
    es_value['start_time'] = datetime.utcfromtimestamp(es_value['start_time'])
    es_value['end_time'] = datetime.utcfromtimestamp(es_value['end_time'])
    try:
        es.index(index='sec-log-silk-' + datetime.now().strftime("%Y-%m-%d"),
                 doc_type='record',
                 body=es_value,
                 id=None)
コード例 #4
0
 def __enter__(self):
     return elasticsearch5.Elasticsearch([ES_ADDRESS])
コード例 #5
0
from datetime import datetime
import elasticsearch5

es = elasticsearch5.Elasticsearch()
from elasticsearch5.client import IndicesClient
es_index = IndicesClient(es)
if es_index.exists("foo"):
    print("foo already exists.")
else:
    es_index.create(index="foo",
                    body=ur"""{
    "mappings": {
    "task": {
    "properties":{
        "owner":          {"type":"keyword"},
        "environment":    {"type":"keyword"},
        "ID":             {"type":"keyword"},
        "@timestamp":     {"type":"date", "format": "epoch_second"},
        "@process":       {"type":"integer"},
        "@db":            {"type":"integer"},
        "@wait":          {"type":"integer"},
        "status":         {"type":"keyword"}
      }}}}""")