def _es_field_block(self, conn, type, now, max_wait=False): q = { "query": { "term": {"id.exact": self.id} }, "fields": ["last_updated"] } waited = 0.0 while True: if max_wait is not False and waited >= max_wait: break res = raw.search(conn, type, q) j = raw.unpack_result(res) if len(j) == 0: time.sleep(0.5) waited += 0.5 continue if len(j) > 1: raise StoreException("More than one record with id {x}".format(x=self.id)) if j[0].get("last_updated")[0] == now: # NOTE: only works on ES > 1.x break else: time.sleep(0.5) waited += 0.5 continue
def _es_field_block(self, conn, type, now, max_wait=False): q = { "query": { "term": { "id.exact": self.id } }, "fields": ["last_updated"] } waited = 0.0 while True: if max_wait is not False and waited >= max_wait: break res = raw.search(conn, type, q) j = raw.unpack_result(res) if len(j) == 0: time.sleep(0.5) waited += 0.5 continue if len(j) > 1: raise StoreException( "More than one record with id {x}".format(x=self.id)) if j[0].get( "last_updated")[0] == now: # NOTE: only works on ES > 1.x break else: time.sleep(0.5) waited += 0.5 continue
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="1m", scan=False): if q is not None: q = q.copy() if q is None: q = {"query": {"match_all": {}}} if "size" not in q: q["size"] = page_size resp = raw.initialise_scroll(conn, type, q, keepalive, scan) if resp.status_code != 200: # something went wrong initialising the scroll raise ScrollInitialiseException("Unable to initialise scroll - could be your mappings are broken") # otherwise, carry on results, scroll_id = raw.unpack_scroll(resp) total_results = raw.total_results(resp) counter = 0 for r in results: # apply the limit if limit is not None and counter >= int(limit): break counter += 1 yield r while True: # apply the limit if limit is not None and counter >= int(limit): break # if we consumed all the results we were expecting, we can just stop here if counter >= total_results: break # get the next page and check that we haven't timed out sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive) if raw.scroll_timedout(sresp): status = sresp.status_code message = sresp.text ex = "Scroll timed out; {status} - {message}".format(status, message) raise ScrollTimeoutException(ex) # if we didn't get any results back, this also means we're at the end results = raw.unpack_result(sresp) if len(results) == 0: break for r in results: # apply the limit (again) if limit is not None and counter >= int(limit): break counter += 1 yield r
def save(self, conn=None, makeid=True, created=True, updated=True, blocking=False, type=None): if conn is None: conn = self.__conn__ type = self.get_write_type(type) if blocking and not updated: raise StoreException("Unable to do blocking save on record where last_updated is not set") now = util.now() if blocking: # we need the new last_updated time to be later than the new one if now == self.last_updated: time.sleep(1) # timestamp granularity is seconds, so just sleep for 1 now = util.now() # update the new timestamp # the main body of the save if makeid: if "id" not in self.data: self.id = self.makeid() if created: if 'created_date' not in self.data: self.data['created_date'] = now if updated: self.data['last_updated'] = now raw.store(conn, type, self.data, self.id) if blocking: q = { "query" : { "term" : {"id.exact" : self.id} }, "fields" : ["last_updated"] } while True: res = raw.search(conn, type, q) j = raw.unpack_result(res) if len(j) == 0: time.sleep(0.5) continue if len(j) > 1: raise StoreException("More than one record with id {x}".format(x=self.id)) if j[0].get("last_updated")[0] == now: # NOTE: only works on ES > 1.x break else: time.sleep(0.5) continue
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="1m", keyword_subfield="exact"): if q is not None: q = q.copy() if q is None: q = {"query" : {"match_all" : {}}} if "size" not in q: q["size"] = page_size if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet q["sort"] = [{"id." + keyword_subfield : {"order" : "asc"}}] resp = raw.initialise_scroll(conn, type, q, keepalive) if resp.status_code != 200: # something went wrong initialising the scroll raise ScrollException("Unable to initialise scroll - could be your mappings are broken") # otherwise, carry on results, scroll_id = raw.unpack_scroll(resp) counter = 0 for r in results: # apply the limit if limit is not None and counter >= int(limit): break counter += 1 yield r while True: # apply the limit if limit is not None and counter >= int(limit): break sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive) if raw.scroll_timedout(sresp): raise ScrollException("scroll timed out - you probably need to raise the keepalive value") results = raw.unpack_result(sresp) if len(results) == 0: break for r in results: # apply the limit (again) if limit is not None and counter >= int(limit): break counter += 1 yield r
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="10m"): if q is not None: q = q.copy() if q is None: q = {"query" : {"match_all" : {}}} if "size" not in q: q["size"] = page_size if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet q["sort"] = [{"id" : {"order" : "asc"}}] resp = raw.initialise_scroll(conn, type, q, keepalive) if resp.status_code != 200: # something went wrong initialising the scroll raise ScrollException("Unable to initialise scroll - could be your mappings are broken") # otherwise, carry on results, scroll_id = raw.unpack_scroll(resp) counter = 0 for r in results: # apply the limit if limit is not None and counter >= int(limit): break counter += 1 yield r while True: # apply the limit if limit is not None and counter >= int(limit): break sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive) if raw.scroll_timedout(sresp): raise ScrollException("scroll timed out - you probably need to raise the keepalive value") results = raw.unpack_result(sresp) if len(results) == 0: break for r in results: # apply the limit (again) if limit is not None and counter >= int(limit): break counter += 1 yield r
def iterate(conn, type, q, page_size=1000, limit=None, method="POST", keyword_subfield="exact"): q = q.copy() q["size"] = page_size q["from"] = 0 if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet q["sort"] = [{"id." + keyword_subfield : {"order" : "asc"}}] counter = 0 while True: # apply the limit if limit is not None and counter >= int(limit): break res = raw.search(conn, type=type, query=q, method=method) rs = raw.unpack_result(res) if len(rs) == 0: break for r in rs: # apply the limit (again) if limit is not None and counter >= int(limit): break counter += 1 yield r q["from"] += page_size
def iterate(conn, type, q, page_size=1000, limit=None, method="POST"): q = q.copy() q["size"] = page_size q["from"] = 0 if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet q["sort"] = [{"id" : {"order" : "asc"}}] counter = 0 while True: # apply the limit if limit is not None and counter >= int(limit): break res = raw.search(conn, type=type, query=q, method=method) rs = raw.unpack_result(res) if len(rs) == 0: break for r in rs: # apply the limit (again) if limit is not None and counter >= int(limit): break counter += 1 yield r q["from"] += page_size
def save(self, conn=None, makeid=True, created=True, updated=True, blocking=False, type=None): if conn is None: conn = self._get_connection() if type is None: type = self._get_write_type() if blocking and not updated: raise StoreException( "Unable to do blocking save on record where last_updated is not set" ) now = util.now() if blocking: # we need the new last_updated time to be later than the new one if now == self.last_updated: time.sleep( 1) # timestamp granularity is seconds, so just sleep for 1 now = util.now() # update the new timestamp # the main body of the save if makeid: if "id" not in self.data: self.id = self.makeid() if created: if 'created_date' not in self.data: self.data['created_date'] = now if updated: self.data['last_updated'] = now raw.store(conn, type, self.data, self.id) if blocking: q = { "query": { "term": { "id.exact": self.id } }, "fields": ["last_updated"] } while True: res = raw.search(conn, type, q) j = raw.unpack_result(res) if len(j) == 0: time.sleep(0.5) continue if len(j) > 1: raise StoreException( "More than one record with id {x}".format(x=self.id)) if j[0].get("last_updated" )[0] == now: # NOTE: only works on ES > 1.x break else: time.sleep(0.5) continue