Example #1
0
File: gme.py Project: ruo91/Hodor
def get_viable_bboxes(ctx, table_id, minrequiredqps, bbox, pkey):
    """Calculate the bounding boxes within a given area
    that it's viable to query given GME's known limits.

  Parameters
  ----------
  ctx: Context
    A Click Context object.
  table_id: int
    The GME vector tableId to query
  minrequiredqps : int
    The minimum QPS (Queries Per Second) required from GME for a query to be considered viable.
  bbox: list
    A bounding box in the traditional order of [minx, miny, maxx, maxy]
  pkey : string
    The primary key of the table being queried.
  """
    @retries(10, delay=0.25, backoff=0.25)
    def features_list(polygon, pkey):
        request_start_time = time.time()
        response = ctx.service.tables().features().list(
            id=table_id, maxResults=1, select=pkey,
            intersects=polygon).execute()

        # Obey GME's QPS limits
        request_elapsed_time = time.time() - request_start_time
        nap_time = max(0, 1.3 - request_elapsed_time)
        time.sleep(nap_time)

        return response

    untestedbboxes = bbox2quarters(
        bbox)  # Split the input into at least four separate bounding boxes
    viablebboxes = []
    while untestedbboxes:
        try:
            bbox = untestedbboxes.pop(0)
            response = features_list(bbox2poly(*bbox), pkey)

            if 'allowedQueriesPerSecond' in response and response[
                    'allowedQueriesPerSecond'] < minrequiredqps:
                raise QPSTooLow("Query too expensive.")

            if len(response['features']) > 0:
                viablebboxes.append(bbox)
                ctx.log("%s viable bounding boxes, %s remaining to test" %
                        (len(viablebboxes), len(untestedbboxes)))

        except (QueryTooExpensive, QPSTooLow) as e:
            ctx.vlog("%s got error '%s', splitting." % (bbox, e))
            untestedbboxes.extend(bbox2quarters(bbox))

    # Shuffle to distribute the expensive queries across the threads
    random.shuffle(viablebboxes)
    return viablebboxes
Example #2
0
File: gme.py Project: ruo91/Hodor
def get_viable_bboxes(ctx, table_id, minrequiredqps, bbox, pkey):
  """Calculate the bounding boxes within a given area
    that it's viable to query given GME's known limits.

  Parameters
  ----------
  ctx: Context
    A Click Context object.
  table_id: int
    The GME vector tableId to query
  minrequiredqps : int
    The minimum QPS (Queries Per Second) required from GME for a query to be considered viable.
  bbox: list
    A bounding box in the traditional order of [minx, miny, maxx, maxy]
  pkey : string
    The primary key of the table being queried.
  """
  @retries(10, delay=0.25, backoff=0.25)
  def features_list(polygon, pkey):
    request_start_time = time.time()
    response = ctx.service.tables().features().list(
                id=table_id, maxResults=1,
                select=pkey,
                intersects=polygon
    ).execute()

    # Obey GME's QPS limits
    request_elapsed_time = time.time() - request_start_time
    nap_time = max(0, 1.3 - request_elapsed_time)
    time.sleep(nap_time)

    return response

  untestedbboxes = bbox2quarters(bbox) # Split the input into at least four separate bounding boxes
  viablebboxes = []
  while untestedbboxes:
    try:
      bbox = untestedbboxes.pop(0)
      response = features_list(bbox2poly(*bbox), pkey)

      if 'allowedQueriesPerSecond' in response and response['allowedQueriesPerSecond'] < minrequiredqps:
        raise QPSTooLow("Query too expensive.")

      if len(response['features']) > 0:
        viablebboxes.append(bbox)
        ctx.log("%s viable bounding boxes, %s remaining to test" % (len(viablebboxes), len(untestedbboxes)))

    except (QueryTooExpensive, QPSTooLow) as e:
      ctx.vlog("%s got error '%s', splitting." % (bbox, e))
      untestedbboxes.extend(bbox2quarters(bbox))

  # Shuffle to distribute the expensive queries across the threads
  random.shuffle(viablebboxes)
  return viablebboxes
Example #3
0
  def get(ctx, bboxes, where, table_id, feature_store, pkey, debug, debug_store):
    """Sub-process to retrieve all of the features for a given chunk of
      bounding boxes.

    Parameters
    ----------
    ctx : Context
      A Click Context object.
    bboxes : list
      A list of lists of bounding boxes to query.
    where : string
      A string describing GME's SQL-lite querying syntac.
    table_id : int
      The GME tableId to query.
    feature_store : Manager.dict()
      The master Manager().dict() object to store retrieved features to.
    pkey : string
      The name of the primary key column in the data source.
    debug : boolean
      Toggles debug mode to load httplib2 monkey patching to record request info.
    debug_store : Manager.list()
      The master Manager().list() object to store request details to for debugging.
    """

    if debug:
      import hodor.httplib2_patch

    pid = multiprocessing.current_process().pid
    if pid not in ctx.thread_safe_services:
      ctx.log("## pid %s getting a new token... ##" % (pid))
      ctx.thread_safe_services[pid] = ctx.get_authenticated_service(ctx.RW_SCOPE)

    thread_start_time = time.time()
    while bboxes:
      features = []

      bbox = bboxes.pop(0)
      resource = ctx.thread_safe_services[pid].tables().features()
      request = resource.list(
        id=table_id, maxResults=1000,
        intersects=bbox2poly(*bbox),
        where=where
      )

      page_counter = 0
      resultset_start_time = time.time()
      while request != None:
        try:
          page_counter += 1

          request_start_time = time.time()
          if debug:
            headers, response = features_list(request)
            request_elapsed_time = time.time() - request_start_time

            debug_store.append((
              headers['status'],
              headers['date'],
              len(response['features']) if headers['status'] == "200" else 0,
              headers.get('x---stop-time') - request_start_time,
              (request_elapsed_time),
              ', '.join(str(v) for v in bbox),
              page_counter,
              request.uri
            ))
          else:
            response = features_list(request)
            request_elapsed_time = time.time() - request_start_time

          features += response['features']

          # Obey GME's QPS limits
          nap_time = max(0, 1 - request_elapsed_time)
          time.sleep(nap_time)

          request = resource.list_next(request, response)
        except BackendError as e:
          # For 'Deadline exceeded' errors
          ctx.log("pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again." %
                    (pid, e, ', '.join(str(v) for v in bbox), page_counter, time.time() - resultset_start_time, len(features)))

          request = None
          features = []
          page_counter = 0
          bboxes.extend(bbox2quarters(bbox)) # Split and append to the end
          break
      else:
        # Add new features to the master store
        for f in features:
          if f['properties'][pkey] not in feature_store:
            feature_store[f['properties'][pkey]] = f

        ctx.log("pid %s retrieved %s features from %s pages in %ss" % (pid, len(features), page_counter, round(time.time() - resultset_start_time, 2)))

    thread_elapsed_time = time.time() - thread_start_time
    ctx.log("pid %s finished chunk in %smins" % (pid, round(thread_elapsed_time / 60, 2)))
Example #4
0
  def get_all_features(ctx, bboxes, where, table_id, feature_store, pkey, debug, debug_store, qps, qps_share):
    """Sub-process to retrieve all of the features for a given chunk of
      bounding boxes.

    Parameters
    ----------
    ctx : Context
      A Click Context object.
    bboxes : list
      A list of lists of bounding boxes to query.
    where : string
      A string describing GME's SQL-lite querying syntac.
    table_id : int
      The GME tableId to query.
    feature_store : Manager.dict()
      The master Manager().dict() object to store retrieved features to.
    pkey : string
      The name of the primary key column in the data source.
    debug : boolean
      Toggles debug mode to load httplib2 monkey patching to record request info.
    debug_store : Manager.list()
      The master Manager().list() object to store request details to for debugging.
    qps : int
      The allowed QPS. Refer to hodor.gme.obey_qps().
    qps_share : int
      Each thread's share of the QPS. Refer to hodor.gme.obey_qps().
    """
    @obey_qps(qps=qps, share=qps_share)
    @retries(10, delay=0.25, backoff=0.25)
    def features_list(request, debug_store=None):
      if debug:
        headers, response = request.execute()

        debug_store.append((
          headers['status'],
          headers['date'],
          len(response['features']) if headers['status'] == "200" else 0,
          headers.get('x---stop-time') - request_start_time,
          (request_elapsed_time),
          ', '.join(str(v) for v in bbox),
          page_counter,
          request.uri
        ))
      else:
        response = request.execute()
      return response

    if debug:
      import hodor.httplib2_patch

    thread_start_time = time.time()
    while bboxes:
      features = []

      bbox = bboxes.pop(0)
      resource = ctx.service(ident=current_process().ident).tables().features()
      request = resource.list(
        id=table_id, maxResults=1000,
        intersects=bbox2poly(*bbox),
        where=where
      )

      page_counter = 0
      resultset_start_time = time.time()
      while request != None:
        try:
          page_counter += 1

          if debug:
            response = features_list(request, debug_store)
          else:
            response = features_list(request)
          features += response['features']

          request = resource.list_next(request, response)
        except BackendError as e:
          # For 'Deadline exceeded' errors
          ctx.log("pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again." %
                    (pid, e, ', '.join(str(v) for v in bbox), page_counter, time.time() - resultset_start_time, len(features)))

          request = None
          features = []
          page_counter = 0
          bboxes.extend(bbox2quarters(bbox)) # Split and append to the end
          break
      else:
        # Add new features to the master store
        for f in features:
          if f['properties'][pkey] not in feature_store:
            feature_store[f['properties'][pkey]] = f

        ctx.log("pid %s retrieved %s features from %s pages in %ss" % (pid, len(features), page_counter, round(time.time() - resultset_start_time, 2)))

    thread_elapsed_time = time.time() - thread_start_time
    ctx.log("pid %s finished chunk in %smins" % (pid, round(thread_elapsed_time / 60, 2)))
Example #5
0
    def get(ctx, bboxes, where, table_id, feature_store, pkey, debug,
            debug_store):
        """Sub-process to retrieve all of the features for a given chunk of
      bounding boxes.

    Parameters
    ----------
    ctx : Context
      A Click Context object.
    bboxes : list
      A list of lists of bounding boxes to query.
    where : string
      A string describing GME's SQL-lite querying syntac.
    table_id : int
      The GME tableId to query.
    feature_store : Manager.dict()
      The master Manager().dict() object to store retrieved features to.
    pkey : string
      The name of the primary key column in the data source.
    debug : boolean
      Toggles debug mode to load httplib2 monkey patching to record request info.
    debug_store : Manager.list()
      The master Manager().list() object to store request details to for debugging.
    """

        if debug:
            import hodor.httplib2_patch

        pid = multiprocessing.current_process().pid
        if pid not in ctx.thread_safe_services:
            ctx.log("## pid %s getting a new token... ##" % (pid))
            ctx.thread_safe_services[pid] = ctx.get_authenticated_service(
                ctx.RW_SCOPE)

        thread_start_time = time.time()
        while bboxes:
            features = []

            bbox = bboxes.pop(0)
            resource = ctx.thread_safe_services[pid].tables().features()
            request = resource.list(id=table_id,
                                    maxResults=1000,
                                    intersects=bbox2poly(*bbox),
                                    where=where)

            page_counter = 0
            resultset_start_time = time.time()
            while request != None:
                try:
                    page_counter += 1

                    request_start_time = time.time()
                    if debug:
                        headers, response = features_list(request)
                        request_elapsed_time = time.time() - request_start_time

                        debug_store.append(
                            (headers['status'], headers['date'],
                             len(response['features'])
                             if headers['status'] == "200" else 0,
                             headers.get('x---stop-time') - request_start_time,
                             (request_elapsed_time),
                             ', '.join(str(v) for v in bbox), page_counter,
                             request.uri))
                    else:
                        response = features_list(request)
                        request_elapsed_time = time.time() - request_start_time

                    features += response['features']

                    # Obey GME's QPS limits
                    nap_time = max(0, 1 - request_elapsed_time)
                    time.sleep(nap_time)

                    request = resource.list_next(request, response)
                except BackendError as e:
                    # For 'Deadline exceeded' errors
                    ctx.log(
                        "pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again."
                        % (pid, e, ', '.join(str(v)
                                             for v in bbox), page_counter,
                           time.time() - resultset_start_time, len(features)))

                    request = None
                    features = []
                    page_counter = 0
                    bboxes.extend(
                        bbox2quarters(bbox))  # Split and append to the end
                    break
            else:
                # Add new features to the master store
                for f in features:
                    if f['properties'][pkey] not in feature_store:
                        feature_store[f['properties'][pkey]] = f

                ctx.log("pid %s retrieved %s features from %s pages in %ss" %
                        (pid, len(features), page_counter,
                         round(time.time() - resultset_start_time, 2)))

        thread_elapsed_time = time.time() - thread_start_time
        ctx.log("pid %s finished chunk in %smins" %
                (pid, round(thread_elapsed_time / 60, 2)))