def handle_cli(self, args, func): parser = argparse.ArgumentParser() parser.add_argument("--input", required=True) parser.add_argument("-o", "--output", default="str", choices=["str", "json"]) parser.add_argument( "--orient", default=self.orient, choices=PANDAS_DATAFRAME_TO_DICT_ORIENT_OPTIONS, ) parser.add_argument( "--output_orient", default=self.output_orient, choices=PANDAS_DATAFRAME_TO_DICT_ORIENT_OPTIONS, ) parsed_args = parser.parse_args(args) orient = parsed_args.orient output_orient = parsed_args.output_orient cli_input = parsed_args.input if os.path.isfile(cli_input) or is_s3_url(cli_input) or is_url( cli_input): if cli_input.endswith(".csv"): df = pd.read_csv(cli_input) elif cli_input.endswith(".json"): df = pd.read_json(cli_input, orient=orient, typ=self.typ, dtype=False) else: raise BadInput( "Input file format not supported, BentoML cli only accepts .json " "and .csv file") else: # Assuming input string is JSON format try: df = pd.read_json(cli_input, orient=orient, typ=self.typ, dtype=False) except ValueError as e: raise BadInput( "Unexpected input format, BentoML DataframeHandler expects json " "string as input: {}".format(e)) if self.typ == "frame" and self.input_dtypes is not None: _check_dataframe_column_contains(self.input_dtypes, df) result = func(df) if parsed_args.output == 'json': result = api_func_result_to_json( result, pandas_dataframe_orient=output_orient) else: result = str(result) print(result)
def _dataframe_csv_from_input(tables, content_types, orients): state = DataFrameState() for table_id, (table, content_type, orient) in enumerate( zip(tables, content_types, orients) ): content_type = content_type or "application/json" if content_type.lower() == "application/json": # Keep order when loading data if sys.version_info >= (3, 6): table = json.loads(table.decode('utf-8')) else: table = json.loads( table.decode('utf-8'), object_pairs_hook=collections.OrderedDict ) elif content_type.lower() == "text/csv": table = _csv_split(table.decode('utf-8'), '\n') if not table: continue else: raise BadInput(f'Invalid content_type for DataframeInput: {content_type}') if content_type.lower() == "application/json": if not orient: orient = _detect_orient(table) if not orient: raise BadInput( 'Unable to detect Json orient, please specify the format orient.' ) if orient not in _ORIENT_MAP: raise NotImplementedError( f'Json orient "{orient}" is not supported now' ) _from_json = _ORIENT_MAP[orient] try: for line in _from_json(state, table): yield line, table_id if state.line_num else None except Exception as e: # pylint:disable=broad-except guessed_orient = _guess_orient(table) if guessed_orient: raise BadInput( f'Not a valid "{orient}" oriented Json. ' f'The orient seems to be "{guessed_orient}". ' f'Try DataframeInput(orient="{guessed_orient}") instead.' ) from e else: raise BadInput(f'Not a valid "{orient}" oriented Json. ') from e continue elif content_type.lower() == "text/csv": for line in _from_csv_without_index(state, table): yield line, table_id if state.line_num else None
def handle_aws_lambda_event(self, event, func): if event["headers"].get("Content-Type", "").startswith("images/"): image_data = self.imread( base64.decodebytes(event["body"]), pilmode=self.pilmode ) else: raise BadInput( "BentoML currently doesn't support Content-Type: {content_type} for " "AWS Lambda".format(content_type=event["headers"]["Content-Type"]) ) if self.after_open: image_data = self.after_open(image_data) image_data = self.fastai_vision.pil2tensor(image_data, np.float32) if self.div: image_data = image_data.div_(255) if self.cls: image_data = self.cls(image_data) else: image_data = self.fastai_vision.Image(image_data) result = func(image_data) result = get_output_str(result, event["headers"].get("output", "json")) return {"statusCode": 200, "body": result}
def handle_request(self, request, func): if request.content_type == "text/csv": csv_string = StringIO(request.data.decode('utf-8')) df = pd.read_csv(csv_string) else: # Optimistically assuming Content-Type to be "application/json" try: df = pd.read_json( request.data.decode("utf-8"), orient=self.orient, typ=self.typ, dtype=False, ) except ValueError: raise BadInput( "Failed parsing request data, only Content-Type application/json " "and text/csv are supported in BentoML DataframeHandler") if self.typ == "frame" and self.input_dtypes is not None: _check_dataframe_column_contains(self.input_dtypes, df) result = func(df) json_output = api_func_result_to_json( result, pandas_dataframe_orient=self.output_orient) return Response(response=json_output, status=200, mimetype="application/json")
def handle_request(self, request, func): if request.content_type == "application/json": df = pd.read_json( request.data.decode("utf-8"), orient=self.orient, typ=self.typ, dtype=False, ) elif request.content_type == "text/csv": csv_string = StringIO(request.data.decode('utf-8')) df = pd.read_csv(csv_string) else: raise BadInput( "Request content-type not supported, only application/json and " "text/csv are supported") if self.typ == "frame" and self.input_dtypes is not None: _check_dataframe_column_contains(self.input_dtypes, df) result = func(df) json_output = api_func_result_to_json( result, pandas_dataframe_orient=self.output_orient) return Response(response=json_output, status=200, mimetype="application/json")
def _dataframe_csv_from_input(raws, content_types): n_row_sum = -1 for i, (data, content_type) in enumerate(zip(raws, content_types)): if not content_type or content_type.lower() == "application/json": if sys.version_info >= (3, 6): od = json.loads(data.decode('utf-8')) else: od = json.loads( data.decode('utf-8'), # preserve order object_pairs_hook=collections.OrderedDict, ) if isinstance(od, list): if n_row_sum == -1: # make header yield ",".join( itertools.chain(('',), map(str, range(len(od[0])))) ), None n_row_sum += 1 for n_row, datas_row in enumerate(od): yield ','.join( itertools.chain((str(n_row_sum),), map(_to_csv_cell, datas_row)) ), i n_row_sum += 1 elif isinstance(od, dict): if n_row_sum == -1: # make header yield ",".join(itertools.chain(('',), map(_to_csv_cell, od))), None n_row_sum += 1 for n_row, name_row in enumerate(next(iter(od.values()))): datas_row = ( od[name_col][name_row] for n_col, name_col in enumerate(od) ) yield ','.join( itertools.chain((str(n_row_sum),), map(_to_csv_cell, datas_row)) ), i n_row_sum += 1 elif content_type.lower() == "text/csv": data_str = data.decode('utf-8') row_strs = data_str.split('\n') if not row_strs: continue if row_strs[0].strip().startswith(','): # csv with index column if n_row_sum == -1: yield row_strs[0], None for row_str in row_strs[1:]: if not row_str.strip(): # skip blank line continue yield f"{str(n_row_sum)},{row_str.split(',', maxsplit=1)[1]}", i n_row_sum += 1 else: if n_row_sum == -1: yield "," + row_strs[0], None for row_str in row_strs[1:]: if not row_str.strip(): # skip blank line continue yield f"{str(n_row_sum)},{row_str.strip()}", i n_row_sum += 1 else: raise BadInput(f'Invalid content_type for DataframeHandler: {content_type}')
def read_dataframes_from_json_n_csv( datas: Iterable["pd.DataFrame"], content_types: Iterable[str], orient: str = None, ) -> ("pd.DataFrame", Iterable[slice]): ''' load detaframes from multiple raw datas in json or csv fromat, efficiently Background: Each calling of pandas.read_csv or pandas.read_json cost about 100ms, no matter how many lines it contains. Concat jsons/csvs before read_json/read_csv to improve performance. ''' if not pd: raise MissingDependencyException('pandas required') try: rows_csv_with_id = [ (tds if isinstance(tds, str) else ','.join(map(_csv_quote, tds)), table_id) for tds, table_id in _dataframe_csv_from_input( datas, content_types, itertools.repeat(orient)) ] except (TypeError, ValueError) as e: raise BadInput('Invalid input format for DataframeInput') from e str_csv = [r for r, _ in rows_csv_with_id] df_str_csv = '\n'.join(str_csv) df_merged = pd.read_csv(StringIO(df_str_csv), index_col=None) dfs_id = [i for _, i in rows_csv_with_id][1:] slices = _gen_slice(dfs_id) return df_merged, slices
def handle_aws_lambda_event(self, event, func): if event["headers"].get("Content-Type", None) == "text/csv": df = pd.read_csv(event["body"]) else: # Optimistically assuming Content-Type to be "application/json" try: df = pd.read_json(event["body"], orient=self.orient, typ=self.typ, dtype=False) except ValueError: raise BadInput( "Failed parsing request data, only Content-Type application/json " "and text/csv are supported in BentoML DataframeHandler") if self.typ == "frame" and self.input_dtypes is not None: _check_dataframe_column_contains(self.input_dtypes, df) result = func(df) result = api_func_result_to_json( result, pandas_dataframe_orient=self.output_orient) # Allow disabling CORS by setting it to None if self.cors: return { "statusCode": 200, "body": result, "headers": { "Access-Control-Allow-Origin": self.cors }, } return {"statusCode": 200, "body": result}
def handle_request(self, request: flask.Request, func): if request.content_type != "application/json": raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API") resps = self.handle_batch_request( [SimpleRequest.from_flask_request(request)], func) return resps[0].to_flask_response()
def handle_aws_lambda_event(self, event, func): try: parsed_json = json.loads(event["body"]) except JSONDecodeError: raise BadInput("Request body must contain valid json") result = func([parsed_json])[0] return self.output_adapter.to_aws_lambda_event(result, event)
def check_dataframe_column_contains(required_column_names, df): df_columns = set(map(str, df.columns)) for col in required_column_names: if col not in df_columns: raise BadInput( "Missing columns: {}, required_column:{}".format( ",".join(set(required_column_names) - df_columns), df_columns ) )
def verify_image_format_or_raise(file_name: str, accept_format_list: [str]): """ Raise error if file's extension is not in the provided accept_format_list """ _, extension = os.path.splitext(file_name) if extension.lower() not in accept_format_list: raise BadInput( "Input file not in supported format list: {}".format(accept_format_list) )
def handle_request(self, request: flask.Request, func): if request.content_type.lower() == "application/json": parsed_json = json.loads(request.get_data(as_text=True)) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API lambda endpoint") result = func(parsed_json) return self.output_adapter.to_response(result, request)
def handle_request(self, request, func): if request.content_type == "application/json": parsed_json = json.loads(request.data.decode("utf-8")) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API") result = func(parsed_json) return self.output_adapter.to_response(result, request)
def handle_aws_lambda_event(self, event, func): if event["headers"]["Content-Type"] == "application/json": parsed_json = json.loads(event["body"]) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API lambda endpoint") result = func(parsed_json) return self.output_adapter.to_aws_lambda_event(result, event)
def read_json_file(json_file): """ Read the provided JSON file and return the parsed Python object json_file can be any text or binary file that supports .read() """ try: parsed = json.load(json_file) except (json.JSONDecodeError, UnicodeDecodeError): raise BadInput("BentoML#AnnotatedImageInput received invalid JSON file") return parsed
def handle_aws_lambda_event(self, event, func): if event["headers"]["Content-Type"] == "application/json": parsed_json = json.loads(event["body"]) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API lambda endpoint") result = func(parsed_json) json_output = api_func_result_to_json(result) return {"statusCode": 200, "body": json_output}
def handle_aws_lambda_event(self, event, func): if event["headers"].get("Content-Type", "").startswith("images/"): image = self.imread(base64.decodebytes(event["body"]), pilmode=self.pilmode) else: raise BadInput( "BentoML currently doesn't support Content-Type: {content_type} for " "AWS Lambda".format(content_type=event["headers"]["Content-Type"]) ) result = func((image,))[0] return self.output_adapter.to_aws_lambda_event(result, event)
def handle_aws_lambda_event(self, event, func): if event["headers"].get("Content-Type", "").startswith("images/"): image = self.imread(base64.decodebytes(event["body"]), pilmode=self.pilmode) else: raise BadInput( "BentoML currently doesn't support Content-Type: {content_type} for " "AWS Lambda".format(content_type=event["headers"]["Content-Type"]) ) result = func(image) result = get_output_str(result, event["headers"].get("output", "json")) return {"statusCode": 200, "body": result}
def handle_request(self, request, func): if request.content_type == "application/json": parsed_json = json.loads(request.data.decode("utf-8")) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API" ) result = func(parsed_json) json_output = api_func_result_to_json(result) return Response(response=json_output, status=200, mimetype="application/json")
def _load_file(self, request: Request): if len(request.files): if len(request.files) != 1: raise BadInput( "ImageHandler requires one and at least one file at a time, " "if you just upgraded from bentoml 0.7, you may need to use " "MultiImageHandler or LegacyImageHandler instead") input_file = next(iter(request.files.values())) if not input_file: raise BadInput( "BentoML#ImageHandler unexpected HTTP request format") input_stream = input_file.stream else: data = request.get_data() if not data: raise BadInput( "BentoML#ImageHandler unexpected HTTP request format") else: input_stream = BytesIO(data) return input_stream
def handle_request(self, request, func): if request.content_type == "application/json": parsed_json = json.loads(request.data.decode("utf-8")) else: raise BadInput( "Request content-type must be 'application/json' for this " "BentoService API") result = func(parsed_json) result = get_output_str(result, request.headers.get("output", "json")) return Response(response=result, status=200, mimetype="application/json")
def list( self, namespace, operator=None, labels_query=None, offset=None, limit=None, order_by=ListDeploymentsRequest.created_at, ascending_order=False, ): with create_session(self.sess_maker) as sess: query = sess.query(Deployment) order_by = ListDeploymentsRequest.SORTABLE_COLUMN.Name(order_by) order_by_field = getattr(Deployment, order_by) order_by_action = ( order_by_field if ascending_order else desc(order_by_field) ) query = query.order_by(order_by_action) if namespace != ALL_NAMESPACE_TAG: # else query all namespaces query = query.filter_by(namespace=namespace) if operator: operator_name = DeploymentSpec.DeploymentOperator.Name(operator) query = query.filter( Deployment.spec['operator'].contains(operator_name) ) if labels_query: # We only handle key=value query at the moment, the more advanced query # such as `in` or `notin` are not handled. labels_list = labels_query.split(',') for label in labels_list: if '=' not in label: raise BadInput( 'Invalid label format. Please present query in ' 'key=value format' ) label_key, label_value = label.split('=') query = query.filter( Deployment.labels[label_key].contains(label_value) ) # We are not defaulting limit to 200 in the signature, # because protobuf will pass 0 as value limit = limit or 200 # Limit and offset need to be called after order_by filter/filter_by is # called query = query.limit(limit) if offset: query = query.offset(offset) query_result = query.all() return list(map(_deployment_orm_obj_to_pb, query_result))
def handle_cli(self, args, func): parser = argparse.ArgumentParser() parser.add_argument("--input", required=True) parser.add_argument( "--orient", default=self.orient, choices=PANDAS_DATAFRAME_TO_JSON_ORIENT_OPTIONS, ) parsed_args, unknown_args = parser.parse_known_args(args) orient = parsed_args.orient cli_input = parsed_args.input if os.path.isfile(cli_input) or is_s3_url(cli_input) or is_url(cli_input): if cli_input.endswith(".csv"): df = pd.read_csv(cli_input) elif cli_input.endswith(".json"): df = pd.read_json(cli_input, orient=orient, typ=self.typ) else: raise BadInput( "Input file format not supported, BentoML cli only accepts .json " "and .csv file" ) else: # Assuming input string is JSON format try: df = pd.read_json(cli_input, orient=orient, typ=self.typ) except ValueError as e: raise BadInput( "Unexpected input format, BentoML DataframeInput expects json " "string as input: {}".format(e) ) if self.typ == "frame" and self.input_dtypes is not None: check_dataframe_column_contains(self.input_dtypes, df) result = func(df) self.output_adapter.to_cli(result, unknown_args)
def _load_image_data(self, request: Request): if len(request.files): if len(request.files) != 1: raise BadInput( "ImageInput requires one and at least one image file at a time, " "if you just upgraded from bentoml 0.7, you may need to use " "FileInput or LegacyImageInput instead" ) input_file = next(iter(request.files.values())) if not input_file: raise BadInput("BentoML#ImageInput unexpected HTTP request format") file_name = secure_filename(input_file.filename) verify_image_format_or_raise(file_name, self.accept_image_formats) input_stream = input_file.stream else: data = request.get_data() if not data: raise BadInput("BentoML#ImageInput unexpected HTTP request format") else: input_stream = data input_data = self.imread(input_stream, pilmode=self.pilmode) return input_data
def handle_request(self, request, func): """Handle http request that has image file/s. It will convert image into a ndarray for the function to consume. Args: request: incoming request object. func: function that will take ndarray as its arg. options: configuration for handling request object. Return: response object """ if len(self.input_names) == 1 and len(request.files) == 1: # Ignore multipart form input name when LegacyImageHandler is intended # to accept only one image file at a time input_files = [file for _, file in request.files.items()] else: input_files = [ request.files.get(form_input_name) for form_input_name in self.input_names if form_input_name in request.files ] if input_files: file_names = [ secure_filename(file.filename) for file in input_files ] for file_name in file_names: verify_image_format_or_raise(file_name, self.accept_image_formats) input_streams = [ BytesIO(input_file.read()) for input_file in input_files ] else: data = request.get_data() if data: input_streams = (data, ) else: raise BadInput( "BentoML#LegacyImageHandler unexpected HTTP request format" ) input_data = tuple( self.imread(input_stream, pilmode=self.pilmode) for input_stream in input_streams) result = func(*input_data) json_output = api_func_result_to_json(result) return Response(response=json_output, status=200, mimetype="application/json")
def handle_request(self, request, func): """Handle http request that has jsonlized tensorflow tensor. It will convert it into a tf tensor for the function to consume. Args: request: incoming request object. func: function that will take ndarray as its arg. Return: response object """ if request.content_type == "application/json": input_str = request.data.decode("utf-8") result = self._handle_raw_str(input_str, func) return self.output_adapter.to_response(result, request) else: raise BadInput("Request content-type must be 'application/json'" " for this BentoService API")
def handle_aws_lambda_event(self, event, func): if event["headers"].get("Content-Type", None) == "text/csv": df = pd.read_csv(event["body"]) else: # Optimistically assuming Content-Type to be "application/json" try: df = pd.read_json(event["body"], orient=self.orient, typ=self.typ) except ValueError: raise BadInput( "Failed parsing request data, only Content-Type application/json " "and text/csv are supported in BentoML DataframeInput" ) if self.typ == "frame" and self.input_dtypes is not None: check_dataframe_column_contains(self.input_dtypes, df) result = func(df) return self.output_adapter.to_aws_lambda_event(result, event)
def handle_request(self, request, func): input_streams = [] for filename in self.input_names: file = request.files.get(filename) if file is not None: file_name = secure_filename(file.filename) verify_image_format_or_raise(file_name, self.accept_image_formats) input_streams.append(BytesIO(file.read())) if len(input_streams) == 0: data = request.get_data() if data: input_streams = (data, ) else: raise BadInput( "BentoML#ImageHandler unexpected HTTP request: %s" % request) input_data = [] for input_stream in input_streams: data = self.imread(input_stream, pilmode=self.convert_mode) if self.after_open: data = self.after_open(data) data = self.fastai_vision.pil2tensor(data, np.float32) if self.div: data = data.div_(255) if self.cls: data = self.cls(data) else: data = self.fastai_vision.Image(data) input_data.append(data) result = func(*input_data) json_output = api_func_result_to_json(result) return Response(response=json_output, status=200, mimetype="application/json")
def handle_aws_lambda_event(self, event, func): if event["headers"]["Content-Type"] == "application/json": df = pd.read_json(event["body"], orient=self.orient, typ=self.typ, dtype=False) elif event["headers"]["Content-Type"] == "text/csv": df = pd.read_csv(event["body"]) else: raise BadInput( "Request content-type not supported, only application/json and " "text/csv are supported") if self.typ == "frame" and self.input_dtypes is not None: _check_dataframe_column_contains(self.input_dtypes, df) result = func(df) result = api_func_result_to_json( result, pandas_dataframe_orient=self.output_orient) return {"statusCode": 200, "body": result}