Esempio n. 1
0
 def __init__(self,
              index=None,
              doc_type=None,
              es_host=None,
              _use_hg38=False):
     self._es = get_es(es_host)
     self._index = index or config.ES_INDEX_NAME
     self._doc_type = doc_type or config.ES_DOC_TYPE
     self._allowed_options = [
         '_source', 'start', 'from_', 'size', 'sort', 'explain', 'version',
         'facets', 'fetch_all', 'jsonld'
     ]  # , 'host']
     self._scroll_time = '1m'
     self._total_scroll_size = 1000  # Total number of hits to return per scroll batch
     self._hg38 = _use_hg38
     self._jsonld = False
     self._context = json.loads(
         open(config.JSONLD_CONTEXT_PATH, 'r').read())
     if self._total_scroll_size % self.get_number_of_shards() == 0:
         # Total hits per shard per scroll batch
         self._scroll_size = int(self._total_scroll_size /
                                 self.get_number_of_shards())
     else:
         raise MVScrollSetupError("_total_scroll_size of {} can't be ".
                                  format(self._total_scroll_size) +
                                  "divided evenly among {} shards.".format(
                                      self.get_number_of_shards()))
Esempio n. 2
0
 def __init__(self, index=None, doc_type=None, es_host=None, step=5000):
     self._es = get_es(es_host)
     self._index = index or config.ES_INDEX_NAME
     self._doc_type = doc_type or config.ES_DOC_TYPE
     self._esi = ESIndexer(es_host=es_host)
     self._esi._index = self._index
     self._src = get_src_db()
     self.step = step
Esempio n. 3
0
 def __init__(self, index=None, doc_type=None, es_host=None):
     self._es = get_es(es_host)
     self._index = index or config.ES_INDEX_NAME
     self._doc_type = doc_type or config.ES_DOC_TYPE
     self._allowed_options = [
         '_source', 'start', 'from_', 'size', 'sort', 'explain', 'version',
         'facets'
     ]
Esempio n. 4
0
 def __init__(self, index=None, doc_type=None, es_host=None):
     self._es = get_es(es_host)
     self._index = index or config.ES_INDEX_NAME
     self._doc_type = doc_type or config.ES_DOC_TYPE
     self._allowed_options = ['_source', 'start', 'from_', 'size',
                              'sort', 'explain', 'version', 'facets', 'fetch_all', 'jsonld', 'host']
     self._scroll_time = '1m'
     self._total_scroll_size = 1000   # Total number of hits to return per scroll batch
     if self._total_scroll_size % self.get_number_of_shards() == 0:
         # Total hits per shard per scroll batch
         self._scroll_size = int(self._total_scroll_size / self.get_number_of_shards())
     else:
         raise MVScrollSetupError("_total_scroll_size of {} can't be ".format(self._total_scroll_size) +
                                  "divided evenly among {} shards.".format(self.get_number_of_shards()))
Esempio n. 5
0
#http://www.elasticsearch.org/guide/reference/query-dsl/boosting-query.html

import types
import json
import re
import time
from utils.common import is_int, timesofar
from utils.es import get_es
from pyes.exceptions import NotFoundException
from pyes.utils import make_path
from pyes.query import MatchAllQuery, StringQuery
from config import ES_INDEX_NAME, ES_INDEX_TYPE
#from pyelasticsearch import ElasticSearch

#es0 = ElasticSearch('http://su02:9200/')
es = get_es()

def is_int(s):
    """return True or False if input string is integer or not."""
    try:
        int(s)
        return True
    except ValueError:
        return False

dummy_model = lambda es, res: res

class ESQuery:
    def __init__(self):
        #self.conn0 = es0
        self.conn = es
Esempio n. 6
0
 def __init__(self, index=None, doc_type=None, es_host=None):
     self._es = get_es(es_host)
     self._index = index or config.ES_INDEX_NAME
     self._doc_type = doc_type or config.ES_DOC_TYPE
     self._allowed_options = ['_source', 'start', 'from_', 'size',
                              'sort', 'explain', 'version', 'facets']