def cleanup_endpoint_files(name, query_path, retain_versions=None): ''' Cleanup the disk space a certain endpiont uses. Parameters ---------- name : str The endpoint name retain_version : int, optional If given, then all files for this endpoint are removed except the folder for the given version, otherwise, all files for that endpoint are removed. ''' _check_endpoint_name(name) local_dir = os.path.join(query_path, name) # nothing to clean, this is true for state file path where we load # Query Object directly from the state path instead of downloading # to temporary location if not os.path.exists(local_dir): return if not retain_versions: shutil.rmtree(local_dir) else: retain_folders = [os.path.join(local_dir, str(version)) \ for version in retain_versions] log_info("Retain folder: %s" % retain_folders) for file_or_dir in os.listdir(local_dir): candidate_dir = os.path.join(local_dir, file_or_dir) if os.path.isdir( candidate_dir) and candidate_dir not in retain_folders: shutil.rmtree(candidate_dir)
def _load_object(self, object_uri, object_url, object_version, is_update, object_type): try: log_info(msg="Loading object", uri=object_uri, url=object_url, version=object_version, is_update=is_update) if object_type == 'model': po = QueryObject.load(object_url) elif object_type == 'alias': po = object_url else: raise RuntimeError('Unknown object type: %s' % object_type) self.query_objects[object_uri] = {'version': object_version, 'type': object_type, 'endpoint_obj': po, 'status': 'LoadSuccessful', 'last_error': None} except Exception as e: log_error("Unable to load QueryObject", path=object_url, error=str(e)) self.query_objects[object_uri] = { 'version': object_version, 'type': object_type, 'endpoint_obj': None, 'status': 'LoadFailed', 'last_error': 'Load failed: %s' % str(e)}
def init_model_evaluator(settings): ''' This will go through all models that the service currently have and initialize them. ''' tabpy = settings['tabpy'] py_handler = settings['py_handler'] existing_pos = tabpy.get_endpoints() for (object_name, obj_info) in (existing_pos.items() if sys.version_info > (3, 0) else existing_pos.iteritems()): object_version = obj_info['version'] object_type = obj_info['type'] object_path = get_query_object_path(settings['state_file_path'], object_name, object_version) log_info('Load endpoint: %s, version: %s, type: %s' % (object_name, object_version, object_type)) if object_type == 'alias': msg = LoadObject(object_name, obj_info['target'], object_version, False, 'alias') else: local_path = object_path msg = LoadObject(object_name, local_path, object_version, False, object_type) py_handler.manage_request(msg)
def _process_query(self, endpoint_name, start): try: self._add_CORS_header() if not self.request.body: self.request.body = {} # extract request data explicitly for caching purpose request_json = self.request.body.decode('utf-8') # Sanitize input data data = _sanitize_request_data(simplejson.loads(request_json)) except Exception as e: err_msg = format_exception(e, "Invalid Input Data") self.error_out(400, err_msg) return try: (po_name, all_endpoint_names) = self._get_actual_model(endpoint_name) # po_name is None if self.py_handler.ps.query_objects.get( # endpoint_name) is None if not po_name: log_error("UnknownURI", endpoint_name=endpoint_name) self.error_out(404, 'UnknownURI', info="Endpoint '%s' does not exist" % endpoint_name) return po_obj = self.py_handler.ps.query_objects.get(po_name) if not po_obj: log_error("UnknownURI", endpoint_name=po_name) self.error_out(404, 'UnknownURI', info="Endpoint '%s' does not exist" % po_name) return if po_name != endpoint_name: log_info("Querying actual model", po_name=po_name) uid = _get_uuid() # record query w/ request ID in query log qry = Query(po_name, request_json) gls_time = 0 # send a query to PythonService and return (gls_time, result) = self._handle_result(po_name, data, qry, uid) # if error occurred, GLS time is None. if not gls_time: return except Exception as e: err_msg = format_exception(e, 'process query') self.error_out(500, 'Error processing query', info=err_msg) return
def load_state_from_config_file(ps_state): ''' Fill initial TabPy state from state file path ''' try: log_info("Loading state from state file") state_file_path = os.environ['TABPY_STATE_PATH'] config = util._get_state_from_file(state_file_path) ps_state.set_config(config) except Exception as e: log_error("Unable to get state file:", str(e.message))
def load_object(self, object_uri, object_url, object_version, is_update, object_type): try: obj_info = self.query_objects.get(object_uri) if obj_info and obj_info['endpoint_obj'] and ( obj_info['version'] >= object_version): log_info("Received load message for object already loaded") return DownloadSkipped( object_uri, obj_info['version'], "Object with greater " "or equal version already loaded") else: if object_uri not in self.query_objects: self.query_objects[object_uri] = { 'version': object_version, 'type': object_type, 'endpoint_obj': None, 'status': 'LoadInProgress', 'last_error': None} else: self.query_objects[ object_uri]['status'] = 'LoadInProgress' self.EXECUTOR.submit( self._load_object, object_uri, object_url, object_version, is_update, object_type) return LoadInProgress( object_uri, object_url, object_version, is_update, object_type) except Exception as e: log_error("Unable to load QueryObject", path=object_url, error=str(e)) self.query_objects[object_uri] = { 'version': object_version, 'type': object_type, 'endpoint_obj': None, 'status': 'LoadFailed', 'last_error': str(e)} return LoadFailed(object_uri, object_version, str(e))
def manage_request(self, msg): try: log_info("Received request", request_type=type(msg).__name__) if isinstance(msg, LoadObject): response = self.ps.load_object(*msg) elif isinstance(msg, DeleteObjects): response = self.ps.delete_objects(msg.uris) elif isinstance(msg, FlushObjects): response = self.ps.flush_objects() elif isinstance(msg, CountObjects): response = self.ps.count_objects() elif isinstance(msg, ListObjects): response = self.ps.list_objects() else: response = UnknownMessage(msg) return response except Exception as e: log_error("Error processing request", error=e.message) return UnknownMessage(e.message)
def wait_for_endpoint_loaded(py_handler, object_uri): ''' This method waits for the object to be loaded. ''' log_info('Waiting for object to be loaded...') while True: msg = ListObjects() list_object_msg = py_handler.manage_request(msg) if not isinstance(list_object_msg, ObjectList): log_error("Error loading endpoint %s: %s" % (object_uri, list_object_msg)) return for (uri, info) in (list_object_msg.objects.items() if sys.version_info > (3, 0) else list_object_msg.objects.iteritems()): if uri == object_uri: if info['status'] != 'LoadInProgress': log_info("Object load status: %s" % info['status']) return sleep(0.1)
def put(self, name): try: if not self.request.body: self.error_out(400, "Input body cannot be empty") self.finish() return try: request_data = simplejson.loads( self.request.body.decode('utf-8')) except: self.error_out(400, "Failed to decode input body") self.finish() return # check if endpoint exists endpoints = self.tabpy.get_endpoints(name) if len(endpoints) == 0: self.error_out(404, "endpoint %s does not exist." % name) self.finish() return new_version = int(endpoints[name]['version']) + 1 log_info('Endpoint info: %s' % request_data) err_msg = yield self._add_or_update_endpoint( 'update', name, new_version, request_data) if err_msg: self.error_out(400, err_msg) self.finish() else: self.write(self.tabpy.get_endpoints(name)) self.finish() except Exception as e: err_msg = format_exception(e, 'update_endpoint') self.error_out(500, err_msg) self.finish()
def on_state_change(settings): try: py_handler = settings['py_handler'] log_info("Loading state from state file") config = util._get_state_from_file(settings['state_file_path']) new_ps_state = TabPyState(config=config) (has_changes, changes) = _get_latest_service_state(settings, new_ps_state) if not has_changes: log_info("Nothing changed, return.") return new_endpoints = new_ps_state.get_endpoints() for object_name in changes['endpoints']: (object_type, object_version, object_path) = changes['endpoints'][object_name] if not object_path and not object_version: # removal log_info("Removing object", uri=object_name) py_handler.manage_request(DeleteObjects([object_name])) cleanup_endpoint_files(object_name, settings['upload_dir']) else: endpoint_info = new_endpoints[object_name] is_update = object_version > 1 if object_type == 'alias': msg = LoadObject(object_name, endpoint_info['target'], object_version, is_update, 'alias') else: local_path = object_path msg = LoadObject(object_name, local_path, object_version, is_update, object_type) py_handler.manage_request(msg) wait_for_endpoint_loaded(py_handler, object_name) # cleanup old version of endpoint files if object_version > 2: cleanup_endpoint_files( object_name, settings['upload_dir'], [object_version, object_version - 1]) except Exception as e: err_msg = format_exception(e, 'on_state_change') log_warning("Error submitting update model request", error=err_msg)
def get_config(): """Provide consistent mechanism for pulling in configuration. Attempt to retain backward compatibility for existing implementations by grabbing port setting from CLI first. Take settings in the following order: 1. CLI arguments, if present - port only - may be able to deprecate 2. common.config file, and 3. OS environment variables (for ease of setting defaults if not present) 4. current defaults if a setting is not present in any location Additionally provide similar configuration capabilities in between common.config and environment variables. For consistency use the same variable name in the config file as in the os environment. For naming standards use all capitals and start with 'TABPY_' """ try: import tabpy_server.common.config as config except ImportError: config = None settings = {} cli_args = parse_arguments() if cli_args.port is not None: settings['port'] = cli_args.port else: try: settings['port'] = config.TABPY_PORT except AttributeError: settings['port'] = os.getenv('TABPY_PORT', 9004) try: settings['server_version'] = config.TABPY_SERVER_VERSION except AttributeError: settings['server_version'] = os.getenv('TABPY_SERVER_VERSION', 'Alpha') try: settings['bind_ip'] = config.TABPY_BIND_IP except AttributeError: settings['bind_ip'] = os.getenv('TABPY_BIND_IP', '0.0.0.0') try: settings['upload_dir'] = config.TABPY_QUERY_OBJECT_PATH except AttributeError: settings['upload_dir'] = os.getenv('TABPY_QUERY_OBJECT_PATH', '/tmp/query_objects') if not os.path.exists(settings['upload_dir']): os.makedirs(settings['upload_dir']) try: _state_file_path = config.TABPY_STATE_PATH except AttributeError: _state_file_path = os.getenv('TABPY_STATE_PATH', './') settings['state_file_path'] = os.path.realpath( os.path.normpath(os.path.expanduser(_state_file_path))) # if state.ini does not exist try and create it - remove last dependence # on batch/shell script if not os.path.isfile('{}/state.ini'.format(settings['state_file_path'])): shutil.copy('./state.ini.template', '{}/state.ini'.format(settings['state_file_path'])) log_info("Loading state from state file") tabpy_state = _get_state_from_file(settings['state_file_path']) settings['tabpy'] = TabPyState(config=tabpy_state) settings['py_handler'] = PythonServiceHandler(PythonService()) settings['compress_response'] = True if TORNADO_MAJOR >= 4 else "gzip" settings['static_path'] = os.path.join(os.path.dirname(__file__), "static") # Set subdirectory from config if applicable subdirectory = "" if tabpy_state.has_option("Service Info", "Subdirectory"): subdirectory = "/" + tabpy_state.get("Service Info", "Subdirectory") return settings, subdirectory
def post(self): try: body = simplejson.loads(self.request.body.decode('utf-8')) if 'script' not in body: self.error_out(400, 'Script is empty.') return # Transforming user script into a proper function. user_code = body['script'] arguments = None arguments_str = '' if 'data' in body: arguments = body['data'] if arguments is not None: if not isinstance(arguments, dict): self.error_out( 400, 'Script parameters need to be ' 'provided as a dictionary.') return else: arguments_expected = [] for i in range(1, len(arguments.keys()) + 1): arguments_expected.append('_arg' + str(i)) if sorted(arguments_expected) == sorted(arguments.keys()): arguments_str = ', ' + ', '.join(arguments.keys()) else: self.error_out( 400, 'Variables names should follow ' 'the format _arg1, _arg2, _argN') return function_to_evaluate = ('def _user_script(tabpy' + arguments_str + '):\n') for u in user_code.splitlines(): function_to_evaluate += ' ' + u + '\n' log_info("function to evaluate=%s" % function_to_evaluate) result = yield self.call_subprocess(function_to_evaluate, arguments) if result is None: self.error_out(400, 'Error running script. No return value') else: self.write(simplejson.dumps(result)) self.finish() except Exception as e: err_msg = "%s : " % e.__class__.__name__ err_msg += "%s" % str(e) if err_msg != "KeyError : 'response'": err_msg = format_exception(e, 'POST /evaluate') self.error_out(500, 'Error processing script', info=err_msg) else: self.error_out( 404, 'Error processing script', info="The endpoint you're " "trying to query did not respond. Please make sure the " "endpoint exists and the correct set of arguments are " "provided.")