Beispiel #1
0
 def _es_field_block(self, conn, type, now, max_wait=False):
     q = {
         "query": {
             "term": {"id.exact": self.id}
         },
         "fields": ["last_updated"]
     }
     waited = 0.0
     while True:
         if max_wait is not False and waited >= max_wait:
             break
         res = raw.search(conn, type, q)
         j = raw.unpack_result(res)
         if len(j) == 0:
             time.sleep(0.5)
             waited += 0.5
             continue
         if len(j) > 1:
             raise StoreException("More than one record with id {x}".format(x=self.id))
         if j[0].get("last_updated")[0] == now:  # NOTE: only works on ES > 1.x
             break
         else:
             time.sleep(0.5)
             waited += 0.5
             continue
Beispiel #2
0
 def _es_field_block(self, conn, type, now, max_wait=False):
     q = {
         "query": {
             "term": {
                 "id.exact": self.id
             }
         },
         "fields": ["last_updated"]
     }
     waited = 0.0
     while True:
         if max_wait is not False and waited >= max_wait:
             break
         res = raw.search(conn, type, q)
         j = raw.unpack_result(res)
         if len(j) == 0:
             time.sleep(0.5)
             waited += 0.5
             continue
         if len(j) > 1:
             raise StoreException(
                 "More than one record with id {x}".format(x=self.id))
         if j[0].get(
                 "last_updated")[0] == now:  # NOTE: only works on ES > 1.x
             break
         else:
             time.sleep(0.5)
             waited += 0.5
             continue
Beispiel #3
0
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="1m", scan=False):
    if q is not None:
        q = q.copy()
    if q is None:
        q = {"query": {"match_all": {}}}
    if "size" not in q:
        q["size"] = page_size

    resp = raw.initialise_scroll(conn, type, q, keepalive, scan)
    if resp.status_code != 200:
        # something went wrong initialising the scroll
        raise ScrollInitialiseException("Unable to initialise scroll - could be your mappings are broken")

    # otherwise, carry on
    results, scroll_id = raw.unpack_scroll(resp)
    total_results = raw.total_results(resp)

    counter = 0
    for r in results:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break
        counter += 1
        yield r

    while True:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break

        # if we consumed all the results we were expecting, we can just stop here
        if counter >= total_results:
            break

        # get the next page and check that we haven't timed out
        sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive)
        if raw.scroll_timedout(sresp):
            status = sresp.status_code
            message = sresp.text
            ex = "Scroll timed out; {status} - {message}".format(status, message)
            raise ScrollTimeoutException(ex)

        # if we didn't get any results back, this also means we're at the end
        results = raw.unpack_result(sresp)
        if len(results) == 0:
            break

        for r in results:
            # apply the limit (again)
            if limit is not None and counter >= int(limit):
                break
            counter += 1
            yield r
Beispiel #4
0
    def save(self, conn=None, makeid=True, created=True, updated=True, blocking=False, type=None):
        if conn is None:
            conn = self.__conn__

        type = self.get_write_type(type)

        if blocking and not updated:
            raise StoreException("Unable to do blocking save on record where last_updated is not set")

        now = util.now()
        if blocking:
            # we need the new last_updated time to be later than the new one
            if now == self.last_updated:
                time.sleep(1)   # timestamp granularity is seconds, so just sleep for 1
            now = util.now()    # update the new timestamp

        # the main body of the save
        if makeid:
            if "id" not in self.data:
                self.id = self.makeid()
        if created:
            if 'created_date' not in self.data:
                self.data['created_date'] = now
        if updated:
            self.data['last_updated'] = now

        raw.store(conn, type, self.data, self.id)

        if blocking:
            q = {
                "query" : {
                    "term" : {"id.exact" : self.id}
                },
                "fields" : ["last_updated"]
            }
            while True:
                res = raw.search(conn, type, q)
                j = raw.unpack_result(res)
                if len(j) == 0:
                    time.sleep(0.5)
                    continue
                if len(j) > 1:
                    raise StoreException("More than one record with id {x}".format(x=self.id))
                if j[0].get("last_updated")[0] == now:  # NOTE: only works on ES > 1.x
                    break
                else:
                    time.sleep(0.5)
                    continue
Beispiel #5
0
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="1m", keyword_subfield="exact"):
    if q is not None:
        q = q.copy()
    if q is None:
        q = {"query" : {"match_all" : {}}}
    if "size" not in q:
        q["size"] = page_size
    if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet
        q["sort"] = [{"id." + keyword_subfield : {"order" : "asc"}}]

    resp = raw.initialise_scroll(conn, type, q, keepalive)
    if resp.status_code != 200:
        # something went wrong initialising the scroll
        raise ScrollException("Unable to initialise scroll - could be your mappings are broken")

    # otherwise, carry on
    results, scroll_id = raw.unpack_scroll(resp)

    counter = 0
    for r in results:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break
        counter += 1
        yield r

    while True:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break

        sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive)
        if raw.scroll_timedout(sresp):
            raise ScrollException("scroll timed out - you probably need to raise the keepalive value")
        results = raw.unpack_result(sresp)

        if len(results) == 0:
            break
        for r in results:
            # apply the limit (again)
            if limit is not None and counter >= int(limit):
                break
            counter += 1
            yield r
Beispiel #6
0
def scroll(conn, type, q=None, page_size=1000, limit=None, keepalive="10m"):
    if q is not None:
        q = q.copy()
    if q is None:
        q = {"query" : {"match_all" : {}}}
    if "size" not in q:
        q["size"] = page_size
    if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet
        q["sort"] = [{"id" : {"order" : "asc"}}]

    resp = raw.initialise_scroll(conn, type, q, keepalive)
    if resp.status_code != 200:
        # something went wrong initialising the scroll
        raise ScrollException("Unable to initialise scroll - could be your mappings are broken")

    # otherwise, carry on
    results, scroll_id = raw.unpack_scroll(resp)

    counter = 0
    for r in results:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break
        counter += 1
        yield r

    while True:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break

        sresp = raw.scroll_next(conn, scroll_id, keepalive=keepalive)
        if raw.scroll_timedout(sresp):
            raise ScrollException("scroll timed out - you probably need to raise the keepalive value")
        results = raw.unpack_result(sresp)

        if len(results) == 0:
            break
        for r in results:
            # apply the limit (again)
            if limit is not None and counter >= int(limit):
                break
            counter += 1
            yield r
Beispiel #7
0
def iterate(conn, type, q, page_size=1000, limit=None, method="POST", keyword_subfield="exact"):
    q = q.copy()
    q["size"] = page_size
    q["from"] = 0
    if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet
        q["sort"] = [{"id." + keyword_subfield : {"order" : "asc"}}]
    counter = 0
    while True:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break
        
        res = raw.search(conn, type=type, query=q, method=method)
        rs = raw.unpack_result(res)
        
        if len(rs) == 0:
            break
        for r in rs:
            # apply the limit (again)
            if limit is not None and counter >= int(limit):
                break
            counter += 1
            yield r
        q["from"] += page_size
Beispiel #8
0
def iterate(conn, type, q, page_size=1000, limit=None, method="POST"):
    q = q.copy()
    q["size"] = page_size
    q["from"] = 0
    if "sort" not in q: # to ensure complete coverage on a changing index, sort by id is our best bet
        q["sort"] = [{"id" : {"order" : "asc"}}]
    counter = 0
    while True:
        # apply the limit
        if limit is not None and counter >= int(limit):
            break
        
        res = raw.search(conn, type=type, query=q, method=method)
        rs = raw.unpack_result(res)
        
        if len(rs) == 0:
            break
        for r in rs:
            # apply the limit (again)
            if limit is not None and counter >= int(limit):
                break
            counter += 1
            yield r
        q["from"] += page_size
Beispiel #9
0
    def save(self,
             conn=None,
             makeid=True,
             created=True,
             updated=True,
             blocking=False,
             type=None):
        if conn is None:
            conn = self._get_connection()

        if type is None:
            type = self._get_write_type()

        if blocking and not updated:
            raise StoreException(
                "Unable to do blocking save on record where last_updated is not set"
            )

        now = util.now()
        if blocking:
            # we need the new last_updated time to be later than the new one
            if now == self.last_updated:
                time.sleep(
                    1)  # timestamp granularity is seconds, so just sleep for 1
            now = util.now()  # update the new timestamp

        # the main body of the save
        if makeid:
            if "id" not in self.data:
                self.id = self.makeid()
        if created:
            if 'created_date' not in self.data:
                self.data['created_date'] = now
        if updated:
            self.data['last_updated'] = now

        raw.store(conn, type, self.data, self.id)

        if blocking:
            q = {
                "query": {
                    "term": {
                        "id.exact": self.id
                    }
                },
                "fields": ["last_updated"]
            }
            while True:
                res = raw.search(conn, type, q)
                j = raw.unpack_result(res)
                if len(j) == 0:
                    time.sleep(0.5)
                    continue
                if len(j) > 1:
                    raise StoreException(
                        "More than one record with id {x}".format(x=self.id))
                if j[0].get("last_updated"
                            )[0] == now:  # NOTE: only works on ES > 1.x
                    break
                else:
                    time.sleep(0.5)
                    continue