def __init__(self, index=None, doc_type=None, es_host=None, _use_hg38=False): self._es = get_es(es_host) self._index = index or config.ES_INDEX_NAME self._doc_type = doc_type or config.ES_DOC_TYPE self._allowed_options = [ '_source', 'start', 'from_', 'size', 'sort', 'explain', 'version', 'facets', 'fetch_all', 'jsonld' ] # , 'host'] self._scroll_time = '1m' self._total_scroll_size = 1000 # Total number of hits to return per scroll batch self._hg38 = _use_hg38 self._jsonld = False self._context = json.loads( open(config.JSONLD_CONTEXT_PATH, 'r').read()) if self._total_scroll_size % self.get_number_of_shards() == 0: # Total hits per shard per scroll batch self._scroll_size = int(self._total_scroll_size / self.get_number_of_shards()) else: raise MVScrollSetupError("_total_scroll_size of {} can't be ". format(self._total_scroll_size) + "divided evenly among {} shards.".format( self.get_number_of_shards()))
def __init__(self, index=None, doc_type=None, es_host=None, step=5000): self._es = get_es(es_host) self._index = index or config.ES_INDEX_NAME self._doc_type = doc_type or config.ES_DOC_TYPE self._esi = ESIndexer(es_host=es_host) self._esi._index = self._index self._src = get_src_db() self.step = step
def __init__(self, index=None, doc_type=None, es_host=None): self._es = get_es(es_host) self._index = index or config.ES_INDEX_NAME self._doc_type = doc_type or config.ES_DOC_TYPE self._allowed_options = [ '_source', 'start', 'from_', 'size', 'sort', 'explain', 'version', 'facets' ]
def __init__(self, index=None, doc_type=None, es_host=None): self._es = get_es(es_host) self._index = index or config.ES_INDEX_NAME self._doc_type = doc_type or config.ES_DOC_TYPE self._allowed_options = ['_source', 'start', 'from_', 'size', 'sort', 'explain', 'version', 'facets', 'fetch_all', 'jsonld', 'host'] self._scroll_time = '1m' self._total_scroll_size = 1000 # Total number of hits to return per scroll batch if self._total_scroll_size % self.get_number_of_shards() == 0: # Total hits per shard per scroll batch self._scroll_size = int(self._total_scroll_size / self.get_number_of_shards()) else: raise MVScrollSetupError("_total_scroll_size of {} can't be ".format(self._total_scroll_size) + "divided evenly among {} shards.".format(self.get_number_of_shards()))
#http://www.elasticsearch.org/guide/reference/query-dsl/boosting-query.html import types import json import re import time from utils.common import is_int, timesofar from utils.es import get_es from pyes.exceptions import NotFoundException from pyes.utils import make_path from pyes.query import MatchAllQuery, StringQuery from config import ES_INDEX_NAME, ES_INDEX_TYPE #from pyelasticsearch import ElasticSearch #es0 = ElasticSearch('http://su02:9200/') es = get_es() def is_int(s): """return True or False if input string is integer or not.""" try: int(s) return True except ValueError: return False dummy_model = lambda es, res: res class ESQuery: def __init__(self): #self.conn0 = es0 self.conn = es
def __init__(self, index=None, doc_type=None, es_host=None): self._es = get_es(es_host) self._index = index or config.ES_INDEX_NAME self._doc_type = doc_type or config.ES_DOC_TYPE self._allowed_options = ['_source', 'start', 'from_', 'size', 'sort', 'explain', 'version', 'facets']