def get_viable_bboxes(ctx, table_id, minrequiredqps, bbox, pkey): """Calculate the bounding boxes within a given area that it's viable to query given GME's known limits. Parameters ---------- ctx: Context A Click Context object. table_id: int The GME vector tableId to query minrequiredqps : int The minimum QPS (Queries Per Second) required from GME for a query to be considered viable. bbox: list A bounding box in the traditional order of [minx, miny, maxx, maxy] pkey : string The primary key of the table being queried. """ @retries(10, delay=0.25, backoff=0.25) def features_list(polygon, pkey): request_start_time = time.time() response = ctx.service.tables().features().list( id=table_id, maxResults=1, select=pkey, intersects=polygon).execute() # Obey GME's QPS limits request_elapsed_time = time.time() - request_start_time nap_time = max(0, 1.3 - request_elapsed_time) time.sleep(nap_time) return response untestedbboxes = bbox2quarters( bbox) # Split the input into at least four separate bounding boxes viablebboxes = [] while untestedbboxes: try: bbox = untestedbboxes.pop(0) response = features_list(bbox2poly(*bbox), pkey) if 'allowedQueriesPerSecond' in response and response[ 'allowedQueriesPerSecond'] < minrequiredqps: raise QPSTooLow("Query too expensive.") if len(response['features']) > 0: viablebboxes.append(bbox) ctx.log("%s viable bounding boxes, %s remaining to test" % (len(viablebboxes), len(untestedbboxes))) except (QueryTooExpensive, QPSTooLow) as e: ctx.vlog("%s got error '%s', splitting." % (bbox, e)) untestedbboxes.extend(bbox2quarters(bbox)) # Shuffle to distribute the expensive queries across the threads random.shuffle(viablebboxes) return viablebboxes
def get_viable_bboxes(ctx, table_id, minrequiredqps, bbox, pkey): """Calculate the bounding boxes within a given area that it's viable to query given GME's known limits. Parameters ---------- ctx: Context A Click Context object. table_id: int The GME vector tableId to query minrequiredqps : int The minimum QPS (Queries Per Second) required from GME for a query to be considered viable. bbox: list A bounding box in the traditional order of [minx, miny, maxx, maxy] pkey : string The primary key of the table being queried. """ @retries(10, delay=0.25, backoff=0.25) def features_list(polygon, pkey): request_start_time = time.time() response = ctx.service.tables().features().list( id=table_id, maxResults=1, select=pkey, intersects=polygon ).execute() # Obey GME's QPS limits request_elapsed_time = time.time() - request_start_time nap_time = max(0, 1.3 - request_elapsed_time) time.sleep(nap_time) return response untestedbboxes = bbox2quarters(bbox) # Split the input into at least four separate bounding boxes viablebboxes = [] while untestedbboxes: try: bbox = untestedbboxes.pop(0) response = features_list(bbox2poly(*bbox), pkey) if 'allowedQueriesPerSecond' in response and response['allowedQueriesPerSecond'] < minrequiredqps: raise QPSTooLow("Query too expensive.") if len(response['features']) > 0: viablebboxes.append(bbox) ctx.log("%s viable bounding boxes, %s remaining to test" % (len(viablebboxes), len(untestedbboxes))) except (QueryTooExpensive, QPSTooLow) as e: ctx.vlog("%s got error '%s', splitting." % (bbox, e)) untestedbboxes.extend(bbox2quarters(bbox)) # Shuffle to distribute the expensive queries across the threads random.shuffle(viablebboxes) return viablebboxes
def get(ctx, bboxes, where, table_id, feature_store, pkey, debug, debug_store): """Sub-process to retrieve all of the features for a given chunk of bounding boxes. Parameters ---------- ctx : Context A Click Context object. bboxes : list A list of lists of bounding boxes to query. where : string A string describing GME's SQL-lite querying syntac. table_id : int The GME tableId to query. feature_store : Manager.dict() The master Manager().dict() object to store retrieved features to. pkey : string The name of the primary key column in the data source. debug : boolean Toggles debug mode to load httplib2 monkey patching to record request info. debug_store : Manager.list() The master Manager().list() object to store request details to for debugging. """ if debug: import hodor.httplib2_patch pid = multiprocessing.current_process().pid if pid not in ctx.thread_safe_services: ctx.log("## pid %s getting a new token... ##" % (pid)) ctx.thread_safe_services[pid] = ctx.get_authenticated_service(ctx.RW_SCOPE) thread_start_time = time.time() while bboxes: features = [] bbox = bboxes.pop(0) resource = ctx.thread_safe_services[pid].tables().features() request = resource.list( id=table_id, maxResults=1000, intersects=bbox2poly(*bbox), where=where ) page_counter = 0 resultset_start_time = time.time() while request != None: try: page_counter += 1 request_start_time = time.time() if debug: headers, response = features_list(request) request_elapsed_time = time.time() - request_start_time debug_store.append(( headers['status'], headers['date'], len(response['features']) if headers['status'] == "200" else 0, headers.get('x---stop-time') - request_start_time, (request_elapsed_time), ', '.join(str(v) for v in bbox), page_counter, request.uri )) else: response = features_list(request) request_elapsed_time = time.time() - request_start_time features += response['features'] # Obey GME's QPS limits nap_time = max(0, 1 - request_elapsed_time) time.sleep(nap_time) request = resource.list_next(request, response) except BackendError as e: # For 'Deadline exceeded' errors ctx.log("pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again." % (pid, e, ', '.join(str(v) for v in bbox), page_counter, time.time() - resultset_start_time, len(features))) request = None features = [] page_counter = 0 bboxes.extend(bbox2quarters(bbox)) # Split and append to the end break else: # Add new features to the master store for f in features: if f['properties'][pkey] not in feature_store: feature_store[f['properties'][pkey]] = f ctx.log("pid %s retrieved %s features from %s pages in %ss" % (pid, len(features), page_counter, round(time.time() - resultset_start_time, 2))) thread_elapsed_time = time.time() - thread_start_time ctx.log("pid %s finished chunk in %smins" % (pid, round(thread_elapsed_time / 60, 2)))
def get_all_features(ctx, bboxes, where, table_id, feature_store, pkey, debug, debug_store, qps, qps_share): """Sub-process to retrieve all of the features for a given chunk of bounding boxes. Parameters ---------- ctx : Context A Click Context object. bboxes : list A list of lists of bounding boxes to query. where : string A string describing GME's SQL-lite querying syntac. table_id : int The GME tableId to query. feature_store : Manager.dict() The master Manager().dict() object to store retrieved features to. pkey : string The name of the primary key column in the data source. debug : boolean Toggles debug mode to load httplib2 monkey patching to record request info. debug_store : Manager.list() The master Manager().list() object to store request details to for debugging. qps : int The allowed QPS. Refer to hodor.gme.obey_qps(). qps_share : int Each thread's share of the QPS. Refer to hodor.gme.obey_qps(). """ @obey_qps(qps=qps, share=qps_share) @retries(10, delay=0.25, backoff=0.25) def features_list(request, debug_store=None): if debug: headers, response = request.execute() debug_store.append(( headers['status'], headers['date'], len(response['features']) if headers['status'] == "200" else 0, headers.get('x---stop-time') - request_start_time, (request_elapsed_time), ', '.join(str(v) for v in bbox), page_counter, request.uri )) else: response = request.execute() return response if debug: import hodor.httplib2_patch thread_start_time = time.time() while bboxes: features = [] bbox = bboxes.pop(0) resource = ctx.service(ident=current_process().ident).tables().features() request = resource.list( id=table_id, maxResults=1000, intersects=bbox2poly(*bbox), where=where ) page_counter = 0 resultset_start_time = time.time() while request != None: try: page_counter += 1 if debug: response = features_list(request, debug_store) else: response = features_list(request) features += response['features'] request = resource.list_next(request, response) except BackendError as e: # For 'Deadline exceeded' errors ctx.log("pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again." % (pid, e, ', '.join(str(v) for v in bbox), page_counter, time.time() - resultset_start_time, len(features))) request = None features = [] page_counter = 0 bboxes.extend(bbox2quarters(bbox)) # Split and append to the end break else: # Add new features to the master store for f in features: if f['properties'][pkey] not in feature_store: feature_store[f['properties'][pkey]] = f ctx.log("pid %s retrieved %s features from %s pages in %ss" % (pid, len(features), page_counter, round(time.time() - resultset_start_time, 2))) thread_elapsed_time = time.time() - thread_start_time ctx.log("pid %s finished chunk in %smins" % (pid, round(thread_elapsed_time / 60, 2)))
def get(ctx, bboxes, where, table_id, feature_store, pkey, debug, debug_store): """Sub-process to retrieve all of the features for a given chunk of bounding boxes. Parameters ---------- ctx : Context A Click Context object. bboxes : list A list of lists of bounding boxes to query. where : string A string describing GME's SQL-lite querying syntac. table_id : int The GME tableId to query. feature_store : Manager.dict() The master Manager().dict() object to store retrieved features to. pkey : string The name of the primary key column in the data source. debug : boolean Toggles debug mode to load httplib2 monkey patching to record request info. debug_store : Manager.list() The master Manager().list() object to store request details to for debugging. """ if debug: import hodor.httplib2_patch pid = multiprocessing.current_process().pid if pid not in ctx.thread_safe_services: ctx.log("## pid %s getting a new token... ##" % (pid)) ctx.thread_safe_services[pid] = ctx.get_authenticated_service( ctx.RW_SCOPE) thread_start_time = time.time() while bboxes: features = [] bbox = bboxes.pop(0) resource = ctx.thread_safe_services[pid].tables().features() request = resource.list(id=table_id, maxResults=1000, intersects=bbox2poly(*bbox), where=where) page_counter = 0 resultset_start_time = time.time() while request != None: try: page_counter += 1 request_start_time = time.time() if debug: headers, response = features_list(request) request_elapsed_time = time.time() - request_start_time debug_store.append( (headers['status'], headers['date'], len(response['features']) if headers['status'] == "200" else 0, headers.get('x---stop-time') - request_start_time, (request_elapsed_time), ', '.join(str(v) for v in bbox), page_counter, request.uri)) else: response = features_list(request) request_elapsed_time = time.time() - request_start_time features += response['features'] # Obey GME's QPS limits nap_time = max(0, 1 - request_elapsed_time) time.sleep(nap_time) request = resource.list_next(request, response) except BackendError as e: # For 'Deadline exceeded' errors ctx.log( "pid %s got error '%s' for [%s] after %s pages and %ss. Discarded %s features. Splitting and trying again." % (pid, e, ', '.join(str(v) for v in bbox), page_counter, time.time() - resultset_start_time, len(features))) request = None features = [] page_counter = 0 bboxes.extend( bbox2quarters(bbox)) # Split and append to the end break else: # Add new features to the master store for f in features: if f['properties'][pkey] not in feature_store: feature_store[f['properties'][pkey]] = f ctx.log("pid %s retrieved %s features from %s pages in %ss" % (pid, len(features), page_counter, round(time.time() - resultset_start_time, 2))) thread_elapsed_time = time.time() - thread_start_time ctx.log("pid %s finished chunk in %smins" % (pid, round(thread_elapsed_time / 60, 2)))