def _get_ids_worker_by_taxid(args): from utils.es import ESIndexer from pyes import TermQuery es_kwargs, taxid, step = args q = TermQuery() q.add('taxid', taxid) q.fields = [] q.size = step esi = ESIndexer(**es_kwargs) res = esi.conn.search(q) xli = [doc['_id'] for doc in res] assert len(xli) == res.total return xli
def testGetCountBySearch(self): self.conn.index({"name": "Joe Tester"}, "test-index", "test-type", 1) self.conn.index({"name": "Bill Baloney"}, "test-index", "test-type", 2) self.conn.refresh(["test-index"]) q = TermQuery("name", "joe") result = self.conn.count(q, indexes=["test-index"]) self.assertResultContains(result, {'count': 1})
def find_term(self, name, value, size=10, index=None): if not self.connection: return query = TermQuery(name, value) return self.connection.search(query=Search(query, size=size), indices=index or self.index)
def test_TermQuery(self): q = TermQuery("name", "joe") resultset = self.conn.search(query=q, indices=["test-index"]) self.assertEquals(resultset.total, 1) q = TermQuery("name", "joe", 3) resultset = self.conn.search(query=q, indices=["test-index"]) self.assertEquals(resultset.total, 1) q = TermQuery("name", "joe", "3") resultset = self.conn.search(query=q, indices=["test-index"]) self.assertEquals(resultset.total, 1) q = TermQuery("value", 43.32) resultset = self.conn.search(query=q, indices=["test-index"]) self.assertEquals(resultset.total, 1)
def test_TermQuery(self): q = TermQuery("parsedtext", "joe") result = self.conn.search(query=q, indexes="test-index") self.dump(result) result = ResultSet(result) self.assertEquals(result.total, 2) self.assertEquals(result.max_score, 0.15342641000000001)
def _get_query_type(self, column, lookup_type, db_type, value): if db_type == "unicode": if (lookup_type == "exact" or lookup_type == "iexact"): q = TermQuery(column, value) return q if (lookup_type == "startswith" or lookup_type == "istartswith"): return RegexTermFilter(column, value) if (lookup_type == "endswith" or lookup_type == "iendswith"): return RegexTermFilter(column, value) if (lookup_type == "contains" or lookup_type == "icontains"): return RegexTermFilter(column, value) if (lookup_type == "regex" or lookup_type == "iregex"): return RegexTermFilter(column, value) if db_type == "datetime" or db_type == "date": if (lookup_type == "exact" or lookup_type == "iexact"): return TermFilter(column, value) #TermFilter, TermsFilter if lookup_type in ["gt", "gte", "lt", "lte", "range", "year"]: value['field'] = column return RangeQuery(ESRange(**value)) if lookup_type == "in": # terms = [TermQuery(column, val) for val in value] # if len(terms) == 1: # return terms[0] # return BoolQuery(should=terms) return TermsFilter(field=column, values=value) raise NotImplemented
def test_TermQuery(self): q = TermQuery("name", "joe") result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1) q = TermQuery("name", "joe", 3) result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1) q = TermQuery("name", "joe", "3") result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1) q = TermQuery("value", 43.32) result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1)
def find_BID_in_SBN(bid, es_server="localhost:9200"): sbn_bid = to_iccu_bid(bid) q = TermQuery('codiceIdentificativo', sbn_bid) es_conn = ES(server=es_server) resultset = list(es_conn.search(query=q, indices="iccu")) if (len(resultset) > 0): return resultset else: return None
def testGetCountBySearch(self): self.conn.index({"name": "Joe Tester"}, self.index_name, self.document_type, 1) self.conn.index({"name": "Bill Baloney"}, self.index_name, self.document_type, 2) self.conn.refresh(self.index_name) q = TermQuery("name", "joe") result = self.conn.count(q, indices=self.index_name) self.assertResultContains(result, {'count': 1})
def post_verify_changes(self, changes): target = GeneDocESBackend(self) _timestamp = changes['timestamp'] ts_stats = self.get_timestamp_stats() if changes['add'] or changes['update']: print('Verifying "add" and "update"...', end='') assert ts_stats[0][0] == _timestamp, "{} != {}".format(ts_stats[0][0], _timestamp) _cnt = ts_stats[0][1] _cnt_add_update = len(changes['add']) + len(changes['update']) if _cnt == _cnt_add_update: print('...{}=={}...OK'.format(_cnt, _cnt_add_update)) else: print('...{}!={}...ERROR!!!'.format(_cnt, _cnt_add_update)) if changes['delete']: print('Verifying "delete"...', end='') _res = target.mget_from_ids(changes['delete']) _cnt = len([x for x in _res if x]) if _cnt == 0: print('...{}==0...OK'.format(_cnt)) else: print('...{}!=0...ERROR!!!'.format(_cnt)) print("Verifying all docs have timestamp...", end='') _cnt = sum([x[1] for x in ts_stats]) _cnt_all = self.count()['count'] if _cnt == _cnt_all: print('{}=={}...OK'.format(_cnt, _cnt_all)) else: print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(_cnt_all, _cnt)) print("Verifying all new docs have updated timestamp...") ts = time.mktime(_timestamp.utctimetuple()) ts = ts - 8 * 3600 # convert to utc timestamp, here 8 hour difference is hard-coded (PST) ts = int(ts * 1000) q = TermQuery() q.add('_timestamp', ts) cur = self.doc_feeder(query=q, fields=[], step=10000) _li1 = sorted(changes['add'] + [x['_id'] for x in changes['update']]) _li2 = sorted([x['_id'] for x in cur]) if _li1 == _li2: print("{}=={}...OK".format(len(_li1), len(_li2))) else: print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(len(_li1), len(_li2)))
def test_TermQuery(self): q = TermQuery("uuid", "1").search(fields=[ 'attachment', 'attachment.author', 'attachment.title', 'attachment.date' ]) # q = TermQuery("uuid", "1", fields=['*']) resultset = self.conn.search(query=q, indices=self.index_name) self.assertEquals(resultset.total, 1) self.assertEquals(resultset.hits[0]['fields']['attachment.author'], u'Tika Developers')
def all_as_schedule(self): """ Get the current schedule comprising entries built from Elastic data. """ self.logger.debug("ControlPlaneScheduler: Fetching database schedule") entries = {} for svc in self._elastic.search(TermQuery("_type", "service")): for task in svc.Tasks or (): entry = self.Entry(svc_model=svc, task_model=task) entries[entry.name] = entry return entries
def test_TermQuery(self): q = TermQuery("uuid", "1").search(fields=[ 'attachment', 'attachment.author', 'attachment.title', 'attachment.date' ]) # q = TermQuery("uuid", "1", fields=['*']) result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1) self.assertEquals( result['hits']['hits'][0]['fields']['attachment.author'], u'Tika Developers')
def test_es_sample_data(self): print "\TestESPyes.test_es_sample_data" print """LOAD SAMPLE DOCS:\n""" resp = self.load_sample('contacts_es') assert resp['status'] == 200 sample_docs = resp['response']['docs'] document_type = 'Cnt' es = self.es index_name = es.__dict__['index_name'] es.delete_index_if_exists(index_name) es.create_index(index_name) es.put_mapping(document_type, {'properties': models.esCnt}, [index_name]) for doc in sample_docs: es.index( { "dNam": doc['dNam'], "oOn": doc['oOn'], "prefix": doc['prefix'], "parsedtext": doc['dNam'] }, index_name, doc['_c'], doc['_id'].__str__()) es.default_indices = [index_name] # es.refresh(index_name) time.sleep(1) q = TermQuery("prefix", "dr") results = es.search(query=q) for r in results: assert r.prefix == 'Dr' q = TermQuery("oOn", datetime.datetime(2012, 10, 8, 13, 44, 33, 851000)) results = es.search(query=q) for r in results: assert r.dNam == 'Einstein, Mr Larry Wayne'
def test_delete(self): ''' Test if records are deleted from remove elastic server ''' with Transaction().start(DB_NAME, USER, context=CONTEXT): self.create_defaults() users = self.create_users() self.assertEqual(len(self.IndexBacklog.search([])), 2) self.IndexBacklog.update_index() self.assertEqual(len(self.IndexBacklog.search([])), 0) time.sleep(2) # wait for changes to reach search server conn = self.IndexBacklog._get_es_connection() result = conn.search(query=TermQuery('rec_name', 'testuser')) self.assertEqual(len(result), 1) self.User.delete(users) self.assertEqual(len(self.IndexBacklog.search([])), 2) self.IndexBacklog.update_index() time.sleep(2) # wait for changes to reach search server result = conn.search(query=TermQuery('rec_name', 'testuser')) self.assertEqual(len(result), 0)
def test_force(self): self.conn.raise_on_bulk_item_failure = False self.conn.index( { "name": "Joe Tester", "parsedtext": "Joe Testere nice guy", "uuid": "11111", "position": 1 }, self.index_name, self.document_type, 1, bulk=True) self.conn.index( { "name": "Bill Baloney", "parsedtext": "Bill Testere nice guy", "uuid": "22222", "position": 2 }, self.index_name, self.document_type, 2, bulk=True) self.conn.index( { "name": "Bill Clinton", "parsedtext": """Bill is not nice guy""", "uuid": "33333", "position": 3 }, self.index_name, self.document_type, 3, bulk=True) bulk_result = self.conn.force_bulk() self.assertEquals(len(bulk_result['items']), 3) self.conn.refresh(self.index_name) q = TermQuery("name", "bill") resultset = self.conn.search(query=q, indices=self.index_name) self.assertEquals(resultset.total, 2)
def test_initial(self): es = self.es es_index_name = es.__dict__['index_name'] generic = controllers.Generic(self.db, es) args = { 'class_name': 'Usr', 'docs': [{ "uNam" :"jkutz", "fNam" :"Josh", "lNam" :"Kutz", "gen" :'m', "emails": [{"email" : "*****@*****.**"}] }] } rs = generic.post(**args) assert rs['status'] == 200 and rs['response']['total_inserted'] == 1 doc = rs['response']['docs'][0]['doc'] #time.sleep(1) es.refresh(es_index_name) q = TermQuery("dNam", "josh") results = es.search(query = q) x=0
def import_prov(conn, index, alias, prov_es_json): """Index PROV-ES concepts into ElasticSearch.""" # fix hadMember ids fix_hadMember_ids(prov_es_json) #print(json.dumps(prov_es_json, indent=2)) # import prefix = prov_es_json['prefix'] for concept in prov_es_json: if concept == 'prefix': continue elif concept == 'bundle': for bundle_id in prov_es_json['bundle']: try: found = len( conn.search(query=TermQuery("_id", bundle_id), indices=[alias])) except SearchPhaseExecutionException: found = 0 if found > 0: continue bundle_prov = copy.deepcopy(prov_es_json['bundle'][bundle_id]) bundle_prov['prefix'] = prefix bundle_doc = { 'identifier': bundle_id, 'prov_es_json': bundle_prov, } for b_concept in bundle_prov: if b_concept == 'prefix': continue bundle_doc[b_concept] = [] for i in bundle_prov[b_concept]: doc = copy.deepcopy(bundle_prov[b_concept][i]) prov_doc = copy.deepcopy(doc) doc['identifier'] = i doc['prov_es_json'] = {'prefix': prefix} doc['prov_es_json'].setdefault(b_concept, {})[i] = prov_doc if 'prov:type' in doc and isinstance( doc['prov:type'], types.DictType): doc['prov:type'] = doc['prov:type'].get('$', '') try: found = len( conn.search(query=TermQuery("_id", i), indices=[alias])) except SearchPhaseExecutionException: found = 0 if found > 0: pass else: conn.index(doc, index, b_concept, i) bundle_doc[b_concept].append(i) conn.index(bundle_doc, index, 'bundle', bundle_id) else: for i in prov_es_json[concept]: try: found = len( conn.search(query=TermQuery("_id", i), indices=[alias])) except SearchPhaseExecutionException: found = 0 if found > 0: continue docs = prov_es_json[concept][i] if not isinstance(docs, types.ListType): docs = [docs] for doc in docs: prov_doc = copy.deepcopy(doc) doc['identifier'] = i doc['prov_es_json'] = {'prefix': prefix} doc['prov_es_json'].setdefault(concept, {})[i] = prov_doc if 'prov:type' in doc and isinstance( doc['prov:type'], types.DictType): doc['prov:type'] = doc['prov:type'].get('$', '') conn.index(doc, index, concept, i)
def check_index_version(): q = TermQuery('dtc', 'dtc') conn.search(query=q, indices=e_index, doc_types="dtc").total
def get_pubs(): q = TermQuery('expired', 'true') f = NotFilter(HasChildFilter('opinion', TermQuery('opinion', 'like'))) q = FilteredQuery(q, f) pubs = conn.search(query=q, indices=e_index, doc_types="immo") return pubs
def test_Update(self): q = TermQuery("name", "joe") result = self.conn.reindex(query=q, indexes=["test-index"]) from pprint import pprint pprint(result) self.assertEquals(result['hits']['total'], 2)
def test_TermQuery(self): q = TermQuery("name", "joe") resultset = self.conn.search(query=q, indices=self.index_name) self.assertEquals(resultset.total, 1) hit = resultset[0] self.assertEquals(hit.inserted, datetime(2010, 10, 22, 12, 12, 12))
def test_TermQuery(self): q = TermQuery("name", "joe") result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 1) self.assertEquals(result['hits']['hits'][0]['_source']['inserted'], datetime(2010, 10, 22, 12, 12, 12))
def import_instruments(instrs, es_url, index, alias): """Create JSON ES docs and import.""" prefix = { "bibo": "http://purl.org/ontology/bibo/", "dcterms": "http://purl.org/dc/terms/", "eos": "http://nasa.gov/eos.owl#", "gcis": "http://data.globalchange.gov/gcis.owl#", "hysds": "http://hysds.jpl.nasa.gov/hysds/0.1#", "info": "http://info-uri.info/", "xlink": "http://www.w3.org/1999/xlink" } conn = ES(es_url) if not conn.indices.exists_index(index): conn.indices.create_index(index) # track agencies/organizations orgs = {} for instr in instrs: identifier = "eos:%s" % instr['Instrument Name Short'] id = hashlib.md5(identifier).hexdigest() if 'Instrument Technology' in instr and not EMPTY.search( instr['Instrument Technology']): sensor = "eos:%s" % instr['Instrument Technology'] else: if 'Instrument Type' in instr and not EMPTY.search( instr['Instrument Type']): sensor = "eos:%s" % instr['Instrument Type'] else: if 'Subtype' in instr and not EMPTY.search(instr['Subtype']): sensor = "eos:%s" % instr['Subtype'] else: if 'Type' in instr and not EMPTY.search(instr['Type']): sensor = "eos:%s" % instr['Type'] else: if 'Class' in instr and not EMPTY.search( instr['Class']): sensor = "eos:%s" % instr['Class'] else: sensor = None #print(instr['Instrument Technology'], sensor) platform = None if 'Instrument Agencies' in instr and not EMPTY.search( instr['Instrument Agencies']): org = "eos:%s" % instr['Instrument Agencies'] if org not in orgs: orgs[org] = { "prov_es_json": { "prefix": prefix, "agent": { org: { "prov:type": { "type": "prov:QualifiedName", "$": "prov:Organization", }, }, }, }, "identifier": org, "prov:type": "prov:Organization", } if len( conn.search(query=TermQuery("_id", org), indices=[alias])) > 0: pass else: conn.index(orgs[org], index, 'agent', org) else: org = None doc = { "prov_es_json": { "prefix": prefix, "entity": { identifier: { "gcis:hasSensor": sensor, "gcis:inPlatform": platform, "prov:type": "eos:instrument", "gcis:hasGoverningOrganization": org, }, }, }, "gcis:hasSensor": sensor, "gcis:inPlatform": platform, "prov:type": "eos:instrument", "gcis:hasGoverningOrganization": org, "identifier": identifier, } if len(conn.search(query=TermQuery("_id", identifier), indices=[alias])) > 0: pass else: conn.index(doc, index, 'entity', identifier)
def test_TermQuery(self): q = TermQuery("name", "bill") result = self.conn.search(query=q, indexes=["test-index"]) self.assertEquals(result['hits']['total'], 2)
def test_TermQuery(self): q = TermQuery("name", "bill") resultset = self.conn.search(query=q, indices=self.index_name) self.assertEquals(resultset.total, 2)