예제 #1
0
 def __make_es(self):
     if isinstance(Config.host, str):
         return ES([Config.host])
     elif isinstance(Config.host, (list, tuple)):
         return ES(*Config.host)
     else:
         raise TypeError(
             "Query.HOST must be a str like '127.0.0.1:9200' or a list/tuple contains host str,"
             "but Your HOST is type %s" % type(Config.host))
예제 #2
0
    def setup(self):
        try:
            self.logger.debug(
                "Connecting to elasticsearch at %s:%d" %
                (self.options.get("host"), self.options.get("port")))

            self.engine = ES(
                [{
                    "host": self.options.get("host"),
                    "port": self.options.get("port")
                }],
                # sniff_on_start=True,
                # sniff_on_connection_fail=True,
                # sniffer_timeout=60,
                timeout=self.options.get("connection_timeout"),
            )

            if not self.engine.ping():
                self.logger.error(
                    "Error connecting to elasticsearch at %s:%d" %
                    (self.options.get("host"), self.options.get("port")))
            self.logger.debug("Connected to elasticsearch")

            # Disable elasticsearch logging @FIXME maybe a mistake
            disable_loggers = [
                "elasticsearch", "elasticsearch.trace", "urllib3"
            ]
            for dl in disable_loggers:
                dl_obj = logging.getLogger(dl)
                dl_obj.propagate = False
                dl_obj.setLevel(logging.CRITICAL)

        except Exception as e:
            self.logger.error("Error setting up Elasticsearch datastore: %s" %
                              str(e))
    def test_size(self):

        global server, pwd

        print("==> test_size")
        try:
            host_params1 = {
                'host': server,
                'port': 9200,
                'use_ssl': True
            }  # RPINUM

            es = ES([host_params1],
                    connection_class=RC,
                    http_auth=("user", pwd),
                    use_ssl=True,
                    verify_certs=False)

            print(es.info())
            res2 = es_helper.elastic_to_dataframe(
                es,
                index="docker_stats*",
                size=10,
                timestampfield="read",
                start=datetime.datetime.now() - datetime.timedelta(hours=1),
                end=datetime.datetime.now())

            print(len(res2))

            self.assertTrue(res2 is not None)
            self.assertTrue((len(res2) >= 0) and (len(res2) <= 10))
        finally:
            pass
예제 #4
0
    def test_date_cols(self):
        global server, pwd, user

        print("==> test_date_cols")

        try:
            host_params1 = {'host': server,
                            'port': 9200, 'use_ssl': True}  # RPINUM

            es = ES([host_params1], connection_class=RC, http_auth=(
                user, pwd), use_ssl=True, verify_certs=False)

            print(es.info())

            try:
                es.indices.delete('test_date_cols')
            except elasticsearch.NotFoundError:
                pass
            
            doc = {
                'attr1': 'test'
            }
            es.index(index="test_date_cols", id="t1", doc_type='_doc', body=doc)
            
            time.sleep(1)
            res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1"])

            print(len(res))
            print(res.columns)

            self.assertTrue("date1" in res.columns)

            doc = {
                'attr1': 'test',
                'date1': datetime.now()
            }

            es.index(index="test_date_cols", id="t2", doc_type='_doc', body=doc)
            
            time.sleep(1)
            res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1"])
            
            self.assertTrue("date1" in res.columns)

            doc = {
                'attr1': 'test',
                'date2': datetime.now()
            }

            es.index(index="test_date_cols", id="t3", doc_type='_doc', body=doc)
            
            time.sleep(1)
            res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1", "date2"])
            
            self.assertTrue("date1" in res.columns)
            self.assertTrue("date2" in res.columns)

            es.indices.delete('test_date_cols')
        finally:
            pass
예제 #5
0
    def test_elastic_to_panda(self):
        """
        Send Receive
        """

        global server, pwd, user

        print("==> test_elastic_to_panda")
        try:
            host_params1 = {'host': server,
                            'port': 9200, 'use_ssl': True}  # RPINUM

            es = ES([host_params1], connection_class=RC, http_auth=(
                user, pwd), use_ssl=True, verify_certs=False)

            print(es.info())
            res2 = es_helper.elastic_to_dataframe(es, index="docker_stats*", scrollsize=1000, datecolumns=[
                                                  "read"], timestampfield="read", start=datetime.now()-timedelta(hours=1), end=datetime.now())
            res2["read"].describe()
            print(len(res2))

            self.assertTrue(res2 is not None)
            self.assertTrue(len(res2) > 0)
        finally:
            pass
예제 #6
0
 def conn(self):
     """Open connection on Elasticsearch DataBase"""
     conn = ES([
         {"host": self.conf.get('host'),
          "port": self.conf.get('port'),
          "url_prefix": self.conf.get('db')}
     ])
     return conn
예제 #7
0
    def __init__(self, es_host, es_port, es_index, doc_type):
        try:
            self.es = ES([{"host": es_host, "port": es_port}])
        except Exception as e:
            return False

        self._check_index_exist(es_index)
        self.index = es_index
        self.doc_type = doc_type
예제 #8
0
def es_connect():
    try:
        es = ES([{'host': '192.168.56.101', 'port': 9200}])
        #es = ES([{'host': '127.0.0.1', 'port': 9200}])
        # print('Connected', es.info())
        return es
    except Exception as ex:
        print('Error: ', ex)
        sys.exit()
예제 #9
0
def main():
    argparser = argparse.ArgumentParser()
    group = argparser.add_mutually_exclusive_group(required=True)
    group.add_argument('-d', '--development', action='store_true')
    args = argparser.parse_args()

    if args.development:
        es = ES([{'host': '127.0.0.1', 'port': '9200'}])
        es.indices.create(index='ithome')
예제 #10
0
 def __init__(self):
     """Open connection on Elasticsearch DataBase"""
     super(Elasticsearch, self).__init__()
     self.search = True
     self.conn = ES([{
         "host": self.conf.get('host'),
         "port": int(self.conf.get('port'))
     }],
                    connection_class=RequestsHttpConnection)
     self.base_url = self.conn.transport.get_connection().base_url
예제 #11
0
def autocomplete(request, string):
    if request.method == 'GET':
        es = ES()
        query = {"query": {"function_score": {"query": {"match_phrase_prefix": {"title": string}},
            "script_score": {"script": " doc['rating'].value / 2 * _score * Math.log(1 + 3* doc['no_ratings'].value )  "

            }}}, "_source": ["title", "_score"], "size": 8}
        response = [x['_source']['title'] for x in
                    es.search(index='csfd', doc_type='movie', body=query)["hits"]['hits']]
        return JsonResponse(response, safe=False)
    else:
        return HttpResponseNotFound('<h1>Page not found</h1>')
예제 #12
0
 def __init__(self, connection, es_index='covid_tweets'):
     aws_auth = AWS4Auth(connection['ACCESS_KEY'], connection['SECRET_KEY'],
                         'us-east-2', 'es')
     self.es = ES(hosts=[{
         'host': connection['AWS_HOST'],
         'port': 443
     }],
                  http_auth=aws_auth,
                  use_ssl=True,
                  verify_certs=True,
                  connection_class=RequestsHttpConnection,
                  timeout=60)
     self.es_index = es_index
예제 #13
0
def searchInEs(des, tar_index, tar_docType, keyName, num_res):

    esDriver = ES([{"host": "127.0.0.1", "port": 9200}])

    #构造请求结构
    query_body = {"query": {"match": {keyName: des}}}

    #在es中搜索
    result = esDriver.search(index=tar_index,
                             doc_type=tar_docType,
                             body=query_body,
                             size=num_res)

    return result["hits"]["hits"]
예제 #14
0
    def test_empty_attr(self):
        global server, pwd, user

        print("==> test_empty_attr")

        try:
            host_params1 = {'host': server,
                            'port': 9200, 'use_ssl': True}  # RPINUM

            es = ES([host_params1], connection_class=RC, http_auth=(
                user, pwd), use_ssl=True, verify_certs=False)

            print(es.info())

            try:
                es.indices.delete('test_empty_attr')
            except elasticsearch.NotFoundError:
                pass

            arr = [
                {
                    '_id': 't1',
                    'attr1': 'test'
                },
                {
                    '_id': 't2',
                    'attr2': 'test'
                },
                {
                    '_id': 't3',
                    'attr3': 'test'
                },
            ]

            df = pd.DataFrame(arr)
            
            INDEX_NAME = 'test_empty_attr'
            
            df['_index'] = INDEX_NAME
            
            es_helper.dataframe_to_elastic(es, df)
            time.sleep(1)

            rec = es.get(index = INDEX_NAME, id = 't1', doc_type = 'doc')

            self.assertTrue("attr1" in rec['_source'])
            self.assertTrue("attr2" not in rec['_source'])
            self.assertTrue("attr3" not in rec['_source'])
        finally:
            pass
예제 #15
0
def send_to_ttp(district, today, ttp_ip, ttp_port, source, doc_type):
    """
    Sends the districts newly produced aggregation to the TTP.
    Returns the index used to store the documents
    """
    index = '{0}-aggrevents-{1}'.format(district, today)
    userlog.info('Sending the aggregated events to TTP\'s ES instance')
    userlog.info('TTP ip:port : {0}:{1}'.format(ttp_ip, ttp_port))
    userlog.info('Index: {0} \t doc_type: {1} \t '.format(index, doc_type))

    ttp_es = ES(hosts=ttp_ip, port=ttp_port, timeout=30)
    ttp_es.index(index=index, doc_type=doc_type, body=source)

    return index
예제 #16
0
    def similar_action_no(actions, state_key, side, sqlite_cursor):
        # decomp key
        decomp_state_key = KoreanChess.decompress_state_key(state_key)
        # full text search for similar state key on elasticsearch
        es = ES('52.79.135.2:80')
        result = es.search('i_irelia_state', 't_blue_state' if side is 'b' else 't_red_state',
                           {
                               "query": {"match": {
                                   "state": decomp_state_key}}
                           })

        if not result or result['_shards']['failed'] > 0:
            return random.randint(0, len(actions) - 1)

        actions_map = {}
        for i, act in enumerate(actions):
            actions_map[KoreanChess.build_action_key(act)] = i

        for item in result['hits']['hits']:
            similar_state = KoreanChess.compress_state_key(item['_source']['state'])
            sqlite_cursor.execute(
                "SELECT quality_json FROM t_quality WHERE state_key='" + KoreanChess.compress_state_key(
                    similar_state) + "'")

            q_json = sqlite_cursor.fetchone()
            if not q_json or q_json[0] == '0':
                continue

            similar_state_map = KoreanChess.convert_state_map(similar_state)
            if side == 'r':
                similar_state_map = KoreanChess.reverse_state_map(similar_state_map)
            similar_state_actions = KoreanChess.get_actions(similar_state_map, side)

            q_values = json.loads(q_json[0])

            q_values = sorted(q_values.items(), key=lambda x: (-x[1], x[0]))

            for q_value_tuple in q_values:
                # get action no
                action_no = int(q_value_tuple[0])
                q_value = q_value_tuple[1]
                if q_value <= 0:
                    break
                sim_action = similar_state_actions[action_no]
                sim_action_key = KoreanChess.build_action_key(sim_action)
                if sim_action_key in actions_map:
                    return actions_map[sim_action_key]

        return random.randint(0, len(actions) - 1)
예제 #17
0
def searchInES(des, index, keyname, resNum):
    esDriver = ES([{"host":"localhost","port":9200}])
    #query body
    queryBody = {
        "query":{
            "term":{
                keyname:des
            }
        }
    }
    print(queryBody)
    #search
    result = esDriver.search(index = index, doc_type="_doc", body = queryBody, size = resNum)
    print(result)
    return result['hits']['hits']
예제 #18
0
def search_entity(q, search_size=200):
    from itertools import product
    from elasticsearch import Elasticsearch as ES
    es = ES('localhost:9200')
    result = es.search(q='sentence:({})'.format(q), size=search_size)
    result = result['hits']['hits']

    companies = []
    for hit in result:
        hits = hit['_source']['entity']
        score = hit['_score']

        companies.extend(product(hits, [score / len(hits)]))
        # companies.extend(hits)
    return companies
예제 #19
0
def hodnotenie(request, number, string, gte, lte):
    query = {"_source": ["title", "rating", "creators.Režie:", "_score", "content.plot"],
        "query": {"bool": {"must": [{"match": {"titles": string}}, {"range": {"rating": {"gte": gte, "lte": lte}}}]}},
        "size": 10, "from": int(number) * 10

    }
    es = ES()
    result_es = es.search(index='csfd', doc_type='movie', body=query)["hits"]
    pages = int(result_es['total']) // 10
    if pages > 10:
        pages = 10
    response = [x for x in result_es["hits"]]
    return render(request, 'search.html', {'movies': [Model(i, 0) for i in response], 'pages': [
        'search@strana-' + str(x) + '@' + string + '@hodnotenie@' + str(gte) + '@' + str(lte) for x in
        range(pages + 1)]})
예제 #20
0
    def __init__(self):

        #可修改:需要从哪个索引中查找
        self._index="qa_data"

        #可修改,es服务器设置
        self.es=ES([{"host":"127.0.0.1","port":9200}])

        #寻找的文档的属性
        self.doc_type="qa"

        #设定句子向量空间的维数
        self.embedSize=300

        #匹配的分数控制
        self.min_score=0.4
        self.min_sim=0.4
예제 #21
0
def searchInEs(des, tar_index, tar_docType, keyName, num_res):

    esDriver = ES([{"host": "localhost", "port": 9200}])

    #构造请求结构
    query_body = {"query": {"match": {keyName: des}}}

    #在es中搜索
    #注意 doc_type已被强制锁定,这是为了适应es7.0没有doctype的设定 这是在deploy版本的代码里才具有的
    #但是在函数参数中依然保留,为确保修改的地方最少
    print("{+} es搜索函数被调用\n {+} 原始结果如下:")
    result = esDriver.search(index=tar_index,
                             doc_type="_doc",
                             body=query_body,
                             size=num_res)
    print(result)
    return result["hits"]["hits"]
예제 #22
0
    def insert_state_key(state_key, is_red=False):
        side = 'red' if is_red else 'blue'
        es = ES('52.79.135.2:80')
        result = es.search('i_irelia_state', 't_%s_state' % side, {
            "query": {
                "constant_score": {
                    "filter": {
                        "term": {
                            "state.keyword": state_key}
                    }
                }
            }
        })

        if result and 'hits' in result and result['hits']['total'] > 0:
            return True

        result = es.index('i_irelia_state', 't_%s_state' % side, {"state": state_key})
        return result and result['created'] == True
예제 #23
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "news_case"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "127.0.0.1", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "case"

        #无需修改,链接mongodb
        self.MGclient = MG()

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.tagged_case
예제 #24
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "law_data"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "127.0.0.1", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "line"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.LAW
예제 #25
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "baike_data_abstract"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "localhost", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "knowledge"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.baidu_baike_BIG
예제 #26
0
    def __init__(self, args):
        """
        Sets up  initial configuration

        :param args: argparse.Namespace object
        """
        # setup node
        self.es_node = ES([{"host": args.host, "port": args.port}])
        self.es_node_url = "http://%s:%d" % (args.host, args.port)

        # connect to node
        connected_to_node = self.check_node_connection(self.es_node_url)
        if not connected_to_node:
            sys.exit(1)

        # setup default configuration
        self.region = args.region
        self.region_url = self.countries_url + "?pRegion=%s" % args.region
        self.index_name = "countries_index"
        self.doc_type = '%s_countries' % self.region
예제 #27
0
def search(env, message, size, start, q):
    ent = Event()
    host = '10.168.169.51'
    client = ES([{'host': host, 'port': 9200}])
    resp = client.search(
        size=size,
        from_=start,
        sort="timestamp",
        fields=['timestamp', 'full_message', 'source'],
        body={
            'query': {
                'filtered': {
                    'query': [
                        {
                            'query_string': {
                                'query': '"' + message + '"'
                            }
                        },
                    ]
                }
            },
            'filter': {
                'and': [
                    {
                        'term': {
                            'environment': env
                        }
                    },  #指定环境
                    {
                        'range': {
                            'timestamp': {
                                'from': '2016-11-18 16:00:00.000',
                                'to': '2016-11-23 16:00:00.000'
                            }
                        }
                    },
                    #{'regexp':{'full_message':message}},#regexp不支持中文,中文会匹配不到
                ]
            }
        })
    q.put((resp, ent))
예제 #28
0
def search(request, number, string, avg):
    if avg == '1':
        avega = get_average()
    else:
        avega = 0

    query = {"query": {"function_score": {
        "query": {"bool": {"must": [{"match": {"titles": string}}, {"range": {"rating": {"gte": avega}}}]}},
        "field_value_factor": {"field": "rating", "modifier": "log1p", "factor": 0.1

        }}}, "_source": ["title", "rating", "creators.Režie:", "_score", "content.plot"], "size": 10,
        "from": int(number) * 10}
    es = ES()
    result_es = es.search(index='csfd', doc_type='movie', body=query)["hits"]
    pages = int(result_es['total']) // 10
    if pages > 10:
        pages = 10
    response = [x for x in result_es["hits"]]
    return render(request, 'search.html', {'movies': [Model(i, 0) for i in response],
                                           'pages': ['search@strana-' + str(x) + '@' + string + '@' + avg for x in
                                                     range(pages + 1)]})
예제 #29
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "qa_data"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "localhost", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "qa"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #self.db.authenticate("reader","reader")
        #可修改,指定collection的名称
        self.collect = self.db.qa_byHand
예제 #30
0
    def __connect(self):
        '''Private method used to connect to the ElasticSearch instance.'''
        es = ES(hosts=[{'host': self.host, 'port': self.port}])

        # checks if server exists
        if not es.ping():
            err = ('It appears that nothing is running at http://%s:%s' %
                   (self.host, self.port))
            raise OSError(err)

        # load the credentials file (if possible)


#         with file(self.cred_path) as cf:
#             username, password = [l.strip() for l in cf.readlines()][:2]
#         data = json.dumps({'username': username, 'password': password})
        url = 'http://%s:%s/login' % (self.host, self.port)
        resp = json.loads(requests.post(url).text)
        #         if resp['status'] == 200:
        #             self.auth_token = resp['token']
        #         else:
        #             self.auth_token = ''

        # checks if index exists
        try:
            es.indices.get_mapping(self.index_name)
        except TransportError as e:
            if e.args[0] == 403:
                err = list(e.args)
                err[1] = ('Credentials not valid for %s:%s/%s' %
                          (self.host, self.port, self.index_name))
                e.args = tuple(err)
            elif e.args[0] == 404:
                self.__del__()
                err = list(e.args)
                err[1] = ('No index named "%s" is avaliable at %s:%s' %
                          (self.index_name, self.host, self.port))
                e.args = tuple(err)
            raise
        return es