def test_wrapped_nested_type(): '''Tests to make sure that nested NamedTuple messages are unpacked correctly''' Inner = create_namedtuple('Inner', [('x', int), ('y', int), ('z', int)]) N1 = create_namedtuple('N1', [('dict_data', Dict[str, int])]) N2 = create_namedtuple('N2', [('n1s', List[N1])]) def f1(x: List[Inner]) -> Inner: '''Returns the component-wise sum of a sequence of Inner''' sums = np.vstack(x).sum(axis=0) return Inner(*sums) def f2(n2_in: N2) -> N2: '''Returns another N2 type using data from the input N2 type''' n1_in = n2_in.n1s[0] dict_data = dict(**n1_in.dict_data) # shallow copy dict_data['b'] = 2 n1_out = N1(dict_data=dict_data) n2_out = N2(n1s=[n1_out, n1_out]) return n2_out f1_in = ([ Inner(1, 2, 3), ] * 5, ) f1_out = (5, 10, 15) n1 = N1(dict_data={'a': 1}) n1_out = N1(dict_data={'a': 1, 'b': 2}) f2_in = N2(n1s=[n1]) f2_out = N2(n1s=[n1_out, n1_out]) _generic_test(f1, f1_in, f1_out) _generic_test(f2, f2_in, f2_out, skip=_dict_skips)
def model_create_pipeline(formatter, clf): formatter.set_params(classifier=clf) tag_type = [] for item in formatter.output_types_: for k in item: tag_type.append((k, item[k])) name_in = "ImageTag" ImageTag = create_namedtuple(name_in, tag_type) name_multiple_in = name_in + "s" ImageTagSet = create_namedtuple(name_in + "Set", [(name_multiple_in, List[ImageTag])]) def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet: df = pd.DataFrame(getattr(val_wrapped, name_multiple_in), columns=ImageTag._fields) tags_df = formatter.predict(df) tags_parts = tags_df.to_dict('split') tags_list = [ImageTag(*r) for r in tags_parts['data']] print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))". format("classify", MODEL_NAME, VERSION, len(df), ImageTagSet, len(tags_df), ImageTagSet)) return ImageTagSet(tags_list) package_path = path.dirname(path.realpath(__file__)) return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, sklearn])
def model_create_pipeline(formatter, clf): from acumos.modeling import Model, List, create_namedtuple from acumos.session import Requirements from os import path from image_mood_classifier._version import MODEL_NAME, __version__ as VERSION # add classifier formatter.set_params(classifier=clf) # create a dataframe and image set # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe()) # TODO: replace with more friendly dataframe operation when it supoprts strings... tag_type = [] print("=================formatter.output_types_:%s",formatter.output_types_) for item in formatter.output_types_: print("++++++++++++++++++++++item:%s",item) for k in item: print("======================k:%s",k) tag_type.append((k, item[k])) print("=================tag_type:%s",tag_type) name_in = "ImageTag" ImageTag = create_namedtuple(name_in, tag_type) print("==========================ImageTag:%s",ImageTag) name_multiple_in = name_in + "s" print("==========================name_multiple_in:%s",name_multiple_in) ImageTagSet = create_namedtuple(name_in + "Set", [(name_multiple_in, List[ImageTag])]) print("========================ImageTagSet:%s",ImageTagSet) print("=======================ImageTag._fields:%s",ImageTag._fields) def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet: '''Returns an array of float predictions''' # NOTE: we don't have a named output type, so need to match 'value' to proto output # print("-===== input -===== ") # print(input_set) df = pd.DataFrame(getattr(val_wrapped, name_multiple_in), columns=ImageTag._fields) # print("-===== df -===== ") # print(df) # print("-===== out df -===== ") tags_df = formatter.predict(df) # print(tags_df) tags_parts = tags_df.to_dict('split') # print("-===== out list -===== ") # print(output_set) tags_list = [ImageTag(*r) for r in tags_parts['data']] print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format( "classify", MODEL_NAME, VERSION, len(df), ImageTagSet, len(tags_df), ImageTagSet)) return ImageTagSet(tags_list) # compute path of this package to add it as a dependency package_path = path.dirname(path.realpath(__file__)) return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, sklearn])
def generate_model(self, CSV_filename, is_raw_data=False): from acumos.modeling import Model, List, create_namedtuple from acumos.session import Requirements from os import path import sklearn print(">> %s: Loading raw features, training model" % CSV_filename) model = self.build_model_from_CSV(CSV_filename, is_raw_data=is_raw_data) print(">> %s: Reload features, push to server" % CSV_filename) df = pd.read_csv(CSV_filename)[self.features] listVars = [(df.columns[i], df.dtypes[i].type) for i in range(len(df.columns))] VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame', listVars) def predict_metric(df: VmPredictorDataFrame) -> List[float]: '''Returns an array of float predictions''' X = np.column_stack(df) return model.predict(X) # compute path of this package to add it as a dependency package_path = path.dirname(path.realpath(__file__)) return Model(classify=predict_metric), Requirements( packages=[package_path], reqs=[matplotlib, sklearn, np, pd])
def test_session_push_sklearn(): '''Tests basic model pushing functionality with sklearn''' clear_jwt() with _patch_auth(): with MockServer() as server: iris = load_iris() X = iris.data y = iris.target clf = RandomForestClassifier(random_state=0) clf.fit(X, y) columns = [ 'sepallength', 'sepalwidth', 'petallength', 'petalwidth' ] X_df = pd.DataFrame(X, columns=columns) DataFrame = create_dataframe('DataFrame', X_df) Predictions = create_namedtuple('Predictions', [('predictions', List[int])]) def predict(df: DataFrame) -> Predictions: '''Predicts the class of iris''' X = np.column_stack(df) yhat = clf.predict(X) preds = Predictions(predictions=yhat) return preds model = Model(predict=predict) model_url, auth_url, _, _ = server.config s = AcumosSession(model_url, auth_url) s.push(model, name='sklearn_iris_push')
def generate_model(self, file_list, data_out=None): from acumos.modeling import Model, List, create_namedtuple from acumos.session import Requirements from os import path import sklearn # Note: all files in the list will be appended master_df, VM_list = self.preprocess_files(file_list) train_start, range_end = self.find_time_range( master_df) # TBD: allow user to specify start/stop dates train_stop = train_start + self.train_interval xmodel, train_data = self.train_timeslice_model(master_df, VM_list, train_start, train_stop, featfile=data_out) df = train_data[self.features] listColumns = list(df.columns) listVars = [(df.columns[i], type(df.ix[0][i])) for i in range(len(listColumns))] VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame', listVars) VmPredictorDataFrameSet = create_namedtuple( 'VmPredictorDataFrameSet', [('frames', List[VmPredictorDataFrame])]) type_out = List[float] def predict_metric(val_wrapped: VmPredictorDataFrameSet) -> type_out: '''Returns an array of float predictions''' df = pd.DataFrame(val_wrapped.frames, columns=listColumns) # df = pd.DataFrame(np.column_stack(val_wrapped), columns=val_wrapped._fields) # numpy doesn't like binary predict_nd = xmodel.predict(df) # return here is a nd-array predict_list = predict_nd.tolist() # flatten to tag set # predict_list = type_out(list(predict_nd)) # flatten to tag set return predict_list # compute path of this package to add it as a dependency package_path = path.dirname(path.realpath(__file__)) return Model(classify=predict_metric), Requirements( packages=[package_path], reqs=[sklearn, np, pd])
def test_session_push_keras(): '''Tests basic model pushing functionality with keras''' clear_jwt() with _patch_auth(): with MockServer() as server: iris = load_iris() X = iris.data y = pd.get_dummies(iris.target).values clf = Sequential() clf.add(Dense(3, input_dim=4, activation='relu')) clf.add(Dense(3, activation='softmax')) clf.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) clf.fit(X, y) columns = [ 'sepallength', 'sepalwidth', 'petallength', 'petalwidth' ] X_df = pd.DataFrame(X, columns=columns) DataFrame = create_dataframe('DataFrame', X_df) Predictions = create_namedtuple('Predictions', [('predictions', List[int])]) def predict(df: DataFrame) -> Predictions: '''Predicts the class of iris''' X = np.column_stack(df) yhat = clf.predict(X) preds = Predictions(predictions=yhat) return preds model = Model(predict=predict) model_url, auth_url, _, _ = server.config s = AcumosSession(model_url, auth_url) s.push(model, name='keras_iris_push')
def model_create_pipeline(path_model, path_label, top_n): from sklearn.pipeline import Pipeline import keras from image_classifier.keras_model.prediction_formatter import Formatter from image_classifier.keras_model.evaluate_image import Predictor from image_classifier.keras_model.image_decoder import ImageDecoder from acumos.modeling import Model, List, create_namedtuple from acumos.session import Requirements from os import path from _version import __version__ # read dictionary to pass along to formatter class dict_classes = eval(open(path_label, 'r').read()) if path_label else None # we will create a hybrid keras/scikit pipeline because we need some preprocessing done # within scikit that is not easily posisble with keras # # stages are as follows (the quoted section is the scikit pipeline name) # #1 'decode' - input+reshape - decode incoming image with MIME+BINARY as inputs # #2 'predict' - prediction - input the transformed image to the prediction method # #3 'format' - predict transform - post-process the predictions into sorted prediction classes # see this page for hints about what happens... # https://stackoverflow.com/questions/37984304/how-to-save-a-scikit-learn-pipline-with-keras-regressor-inside-to-disk # # NOTE: the last object is an "estimator" type so that we can call "predict", as required by the # acumos-based wrapper functionality pipeline = Pipeline([ ('decode', ImageDecoder()), ('predict', Predictor(path_model=path_model)), ('format', Formatter(dict_classes, top_n)) ]) # create a dataframe and image set # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe()) # TODO: replace with more friendly dataframe operation when it supoprts strings... df = ImageDecoder.generate_input_dataframe() image_type = tuple(zip(df.columns, ImageDecoder.generate_input_types())) name_in = "Image" input_image = create_namedtuple(name_in, image_type) # output of clasifier, list of tags df = Formatter.generate_output_dataframe() tag_result = tuple(zip(df.columns, Formatter.generate_output_types())) name_out = "ImageTag" ImageTag = create_namedtuple(name_out, tag_result) output_set = create_namedtuple(name_out + "Set", [(name_out + "s", List[ImageTag])]) def predict_class(val_wrapped: input_image) -> output_set: '''Returns an array of float predictions''' # NOTE: we don't have a named output type, so need to match 'value' to proto output # print("-===== input -===== ") # print(input_set) df = pd.DataFrame([val_wrapped], columns=input_image._fields) # print("-===== df -===== ") # print(df) # print("-===== out df -===== ") tags_df = pipeline.predict(df) # print(tags_df) tags_parts = tags_df.to_dict('split') # print("-===== out list -===== ") # print(output_set) tags_list = [ImageTag(*r) for r in tags_parts['data']] print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format( "classify", MODEL_NAME, __version__, len(df), input_image, len(tags_df), output_set)) return output_set(tags_list) # compute path of this package to add it as a dependency package_path = path.dirname(path.realpath(__file__)) return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, keras, 'tensorflow', 'Pillow'])
def _load_namedtuple(name, field_types): '''Workaround for dill NamedTuple serialization bug''' return create_namedtuple(name, [(k, v) for k, v in field_types.items()])
from acumos.session import AcumosSession if __name__ == '__main__': '''Main''' iris = load_iris() X = iris.data y = iris.target clf = RandomForestClassifier(random_state=0) clf.fit(X, y) columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth'] X_df = pd.DataFrame(X, columns=columns) DataFrame = create_dataframe('DataFrame', X_df) Predictions = create_namedtuple('Predictions', [('predictions', List[int])]) def predict(df: DataFrame) -> Predictions: '''Predicts the class of iris''' X = np.column_stack(df) yhat = clf.predict(X) preds = Predictions(predictions=yhat) return preds model = Model(transform=predict) s = AcumosSession(None) s.dump(model, 'model', '.')
print(train_cols) np.set_printoptions(precision=2) #2 decimal places np.set_printoptions(suppress=True) #remove scientific notation redfin = RedfinAcumosModel() model_cols = redfin.get_formatted_test_cols(train) print(model_cols) # items is the model_cols list from the previous slide items = [('baths', float), ('beds', float), ('square_feet', float), ('property_type', str), ('year_built', float), ('lot_size', float), ('hoa_per_month', float), ('days_on_market', float), ('location', str), ('state', str), ('city', str)] HouseDataFrame = create_namedtuple('HouseDataFrame', items) # here, an appropriate NamedTuple type is inferred from a pandas DataFrame # HouseDataFrame = create_dataframe('HouseDataFrame', X_df) print(HouseDataFrame.__doc__) df = redfin.process_data(train, True) redfin.df = df redfin.df.info() def predict(data): test_df = redfin.process_data(data) # Train by merging locations/columns found in the train dataframe. test_df = redfin.match_train(test_df) redfin.train_model()
# See the License for the specific language governing permissions and # limitations under the License. # ===============LICENSE_END========================================================= """ Provides an image Acumos model example This model returns metadata about input images """ import io import PIL from acumos.modeling import Model, create_namedtuple from acumos.session import AcumosSession ImageShape = create_namedtuple('ImageShape', [('width', int), ('height', int)]) def get_format(data: bytes) -> str: '''Returns the format of an image''' buffer = io.BytesIO(data) img = PIL.Image.open(buffer) return img.format def get_shape(data: bytes) -> ImageShape: '''Returns the width and height of an image''' buffer = io.BytesIO(data) img = PIL.Image.open(buffer) shape = ImageShape(width=img.width, height=img.height) return shape
def model_create_pipeline(transformer, funcName, inputIsSet, outputIsSet): from acumos.session import Requirements from acumos.modeling import Model, List, create_namedtuple from face_privacy_filter._version import MODEL_NAME, __version__ as VERSION import sklearn import cv2 from os import path # derive the input type from the transformer input_type, type_name = transformer._type_in # it looked like this [('test', int), ('tag', str)] type_in = create_namedtuple(type_name, input_type) input_set = type_in name_multiple_in = type_name if inputIsSet: name_multiple_in = type_name + "s" input_set = create_namedtuple(type_name + "Set", [(name_multiple_in, List[type_in])]) # derive the output type from the transformer output_type, type_name = transformer._type_out type_out = create_namedtuple(type_name, output_type) output_set = type_out if outputIsSet: name_multiple_out = type_name + "s" output_set = create_namedtuple(type_name + "Set", [(name_multiple_out, List[type_out])]) def transform(val_wrapped: input_set) -> output_set: '''Transform from image or detection and return score or image''' # print("-===== input -===== ") # print(input_set) if inputIsSet: df = pd.DataFrame(getattr(val_wrapped, name_multiple_in), columns=type_in._fields) else: df = pd.DataFrame([val_wrapped], columns=type_in._fields) # print("-===== df -===== ") # print(df) result_df = transformer.predict(df) # print("-===== out df -===== ") # print(result_df) # print(result_parts) result_parts = result_df.to_dict('split') print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))". format("classify", MODEL_NAME, VERSION, len(df), type_in, len(result_df), output_set)) output_obj = [] if len(df): if outputIsSet: output_obj = output_set( [type_out(*r) for r in result_parts['data']]) else: output_obj = output_set(*result_parts['data'][0]) # print("-===== out list -===== ") # print(output_obj) return output_obj # compute path of this package to add it as a dependency package_path = path.dirname(path.realpath(__file__)) objModelDeclare = {} objModelDeclare[funcName] = transform # add the model dependency manually because of the way we constructed the package; # the opencv-python/cv2 dependency is not picked up automagically return Model(**objModelDeclare), Requirements( packages=[package_path], reqs=[pd, np, sklearn, 'opencv-python'], req_map={cv2: 'opencv-python'})
# Upper half of the faces X_train = train[:, :(n_pixels + 1) // 2] # Lower half of the faces y_train = train[:, n_pixels // 2:] X_test = test[:, :(n_pixels + 1) // 2] y_test = test[:, n_pixels // 2:] knn = KNeighborsRegressor() knn.fit(X_train, y_train) # ============================================================================= # Acumos specific code # ============================================================================= # represents a single "flattened" [1 x n] image array FlatImage = create_namedtuple('FlatImage', [('image', List[float])]) # represents a collection of flattened image arrays FlatImages = create_namedtuple('FlatImages', [('images', List[FlatImage])]) def complete_faces(images: FlatImages) -> FlatImages: '''Predicts the bottom half of each input image''' X = np.vstack(images).squeeze( ) # creates an [m x n] matrixs with m images and n pixels yhat = knn.predict(X) return FlatImages([FlatImage(row) for row in yhat]) model = Model(complete_faces=complete_faces)
from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from acumos.modeling import Model, List, create_namedtuple from acumos.session import AcumosSession iris = load_iris() X = iris.data y = iris.target clf = RandomForestClassifier(random_state=0) clf.fit(X, y) IrisDataFrame = create_namedtuple('IrisDataFrame', [('sepal_length', List[float]), ('sepal_width', List[float]), ('petal_length', List[float]), ('petal_width', List[float])]) # ============================================================================= # # starting in Python 3.6, you can alternatively use this simpler syntax: # # from acumos.modeling import NamedTuple # # class IrisDataFrame(NamedTuple): # '''DataFrame corresponding to the Iris dataset''' # sepal_length: List[float] # sepal_width: List[float] # petal_length: List[float] # petal_width: List[float] # =============================================================================