Ejemplo n.º 1
    def generate_model(self, CSV_filename, is_raw_data=False):
        from acumos.modeling import Model, List, create_namedtuple
        from acumos.session import Requirements
        from os import path
        import sklearn

        print(">> %s:  Loading raw features, training model" % CSV_filename)
        model = self.build_model_from_CSV(CSV_filename,
        print(">> %s:  Reload features, push to server" % CSV_filename)
        df = pd.read_csv(CSV_filename)[self.features]
        listVars = [(df.columns[i], df.dtypes[i].type)
                    for i in range(len(df.columns))]
        VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame',

        def predict_metric(df: VmPredictorDataFrame) -> List[float]:
            '''Returns an array of float predictions'''
            X = np.column_stack(df)
            return model.predict(X)

        # compute path of this package to add it as a dependency
        package_path = path.dirname(path.realpath(__file__))
        return Model(classify=predict_metric), Requirements(
            packages=[package_path], reqs=[matplotlib, sklearn, np, pd])
def model_create_pipeline(formatter, clf):
    tag_type = []
    for item in formatter.output_types_:
        for k in item:
            tag_type.append((k, item[k]))
    name_in = "ImageTag"
    ImageTag = create_namedtuple(name_in, tag_type)
    name_multiple_in = name_in + "s"
    ImageTagSet = create_namedtuple(name_in + "Set",
                                    [(name_multiple_in, List[ImageTag])])

    def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet:
        df = pd.DataFrame(getattr(val_wrapped, name_multiple_in),
        tags_df = formatter.predict(df)
        tags_parts = tags_df.to_dict('split')
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".
              format("classify", MODEL_NAME, VERSION, len(df), ImageTagSet,
                     len(tags_df), ImageTagSet))
        return ImageTagSet(tags_list)

    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path],
                                                       reqs=[pd, np, sklearn])
Ejemplo n.º 3
def model_create_pipeline(formatter, clf):
    from acumos.modeling import Model, List, create_namedtuple
    from acumos.session import Requirements
    from os import path
    from image_mood_classifier._version import MODEL_NAME, __version__ as VERSION

    # add classifier

    # create a dataframe and image set
    # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe())
    # TODO: replace with more friendly dataframe operation when it supoprts strings...
    tag_type = []
    for item in formatter.output_types_:
        for k in item:
            tag_type.append((k, item[k]))
    name_in = "ImageTag"
    ImageTag = create_namedtuple(name_in, tag_type)
    name_multiple_in = name_in + "s"
    ImageTagSet = create_namedtuple(name_in + "Set", [(name_multiple_in, List[ImageTag])])

    def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet:
        '''Returns an array of float predictions'''
        # NOTE: we don't have a named output type, so need to match 'value' to proto output
        # print("-===== input -===== ")
        # print(input_set)
        df = pd.DataFrame(getattr(val_wrapped, name_multiple_in), columns=ImageTag._fields)
        # print("-===== df -===== ")
        # print(df)
        # print("-===== out df -===== ")
        tags_df = formatter.predict(df)
        # print(tags_df)
        tags_parts = tags_df.to_dict('split')
        # print("-===== out list -===== ")
        # print(output_set)
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format(
              "classify", MODEL_NAME, VERSION, len(df), ImageTagSet, len(tags_df), ImageTagSet))
        return ImageTagSet(tags_list)

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, sklearn])
Ejemplo n.º 4
    def generate_model(self, file_list, data_out=None):
        from acumos.modeling import Model, List, create_namedtuple
        from acumos.session import Requirements
        from os import path
        import sklearn

        # Note:  all files in the list will be appended
        master_df, VM_list = self.preprocess_files(file_list)
        train_start, range_end = self.find_time_range(
            master_df)  # TBD:  allow user to specify start/stop dates
        train_stop = train_start + self.train_interval
        xmodel, train_data = self.train_timeslice_model(master_df,

        df = train_data[self.features]
        listColumns = list(df.columns)
        listVars = [(df.columns[i], type(df.ix[0][i]))
                    for i in range(len(listColumns))]
        VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame',
        VmPredictorDataFrameSet = create_namedtuple(
            [('frames', List[VmPredictorDataFrame])])
        type_out = List[float]

        def predict_metric(val_wrapped: VmPredictorDataFrameSet) -> type_out:
            '''Returns an array of float predictions'''
            df = pd.DataFrame(val_wrapped.frames, columns=listColumns)
            # df = pd.DataFrame(np.column_stack(val_wrapped), columns=val_wrapped._fields)  # numpy doesn't like binary
            predict_nd = xmodel.predict(df)  # return here is a nd-array
            predict_list = predict_nd.tolist()  # flatten to tag set
            # predict_list = type_out(list(predict_nd))  # flatten to tag set
            return predict_list

        # compute path of this package to add it as a dependency
        package_path = path.dirname(path.realpath(__file__))
        return Model(classify=predict_metric), Requirements(
            packages=[package_path], reqs=[sklearn, np, pd])
Ejemplo n.º 5
def model_create_pipeline(path_model, path_label, top_n):
    from sklearn.pipeline import Pipeline
    import keras
    from image_classifier.keras_model.prediction_formatter import Formatter
    from image_classifier.keras_model.evaluate_image import Predictor
    from image_classifier.keras_model.image_decoder import ImageDecoder
    from acumos.modeling import Model, List, create_namedtuple
    from acumos.session import Requirements
    from os import path
    from _version import __version__

    # read dictionary to pass along to formatter class
    dict_classes = eval(open(path_label, 'r').read()) if path_label else None

    # we will create a hybrid keras/scikit pipeline because we need some preprocessing done
    #   within scikit that is not easily posisble with keras
    # stages are as follows (the quoted section is the scikit pipeline name)
    #   #1 'decode' - input+reshape - decode incoming image with MIME+BINARY as inputs
    #   #2 'predict' - prediction - input the transformed image to the prediction method
    #   #3 'format' - predict transform - post-process the predictions into sorted prediction classes
    # see this page for hints about what happens...
    #   https://stackoverflow.com/questions/37984304/how-to-save-a-scikit-learn-pipline-with-keras-regressor-inside-to-disk
    # NOTE: the last object is an "estimator" type so that we can call "predict", as required by the
    #       acumos-based wrapper functionality
    pipeline = Pipeline([
        ('decode', ImageDecoder()),
        ('predict', Predictor(path_model=path_model)),
        ('format', Formatter(dict_classes, top_n))

    # create a dataframe and image set
    # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe())
    # TODO: replace with more friendly dataframe operation when it supoprts strings...
    df = ImageDecoder.generate_input_dataframe()
    image_type = tuple(zip(df.columns, ImageDecoder.generate_input_types()))
    name_in = "Image"
    input_image = create_namedtuple(name_in, image_type)

    # output of clasifier, list of tags
    df = Formatter.generate_output_dataframe()
    tag_result = tuple(zip(df.columns, Formatter.generate_output_types()))
    name_out = "ImageTag"
    ImageTag = create_namedtuple(name_out, tag_result)
    output_set = create_namedtuple(name_out + "Set", [(name_out + "s", List[ImageTag])])

    def predict_class(val_wrapped: input_image) -> output_set:
        '''Returns an array of float predictions'''
        # NOTE: we don't have a named output type, so need to match 'value' to proto output
        # print("-===== input -===== ")
        # print(input_set)
        df = pd.DataFrame([val_wrapped], columns=input_image._fields)
        # print("-===== df -===== ")
        # print(df)
        # print("-===== out df -===== ")
        tags_df = pipeline.predict(df)
        # print(tags_df)
        tags_parts = tags_df.to_dict('split')
        # print("-===== out list -===== ")
        # print(output_set)
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format(
              "classify", MODEL_NAME, __version__, len(df), input_image, len(tags_df), output_set))
        return output_set(tags_list)

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, keras, 'tensorflow', 'Pillow'])
Ejemplo n.º 6
def test_wrapped_prim_type():
    '''Tests model wrap and load functionality'''
    def f1(x: int, y: int) -> int:
        return x + y

    def f2(x: int, y: int) -> None:

    def f3() -> None:

    def f4() -> int:
        return 3330

    def f5(data: bytes) -> str:
        '''Something more complex'''
        buffer = io.BytesIO(data)
        img = PIL.Image.open(buffer)
        return img.format

    def f6(x: List[int]) -> int:
        return sum(x)

    def f7(x: List[str]) -> Dict[str, int]:
        return Counter(x)

    def f8(x: List[np.int32]) -> np.int32:
        return np.sum(x)

    # input / output "answers"
    f1_in = (1, 2)
    f1_out = (3, )

    f2_in = (1, 2)
    f2_out = ()

    f3_in = ()
    f3_out = ()

    f4_in = (0, )
    f4_out = (3330, )

    with open(_IMG_PATH, 'rb') as f:
        f5_in = (f.read(), )
        f5_out = ('PNG', )

    f6_in = ([1, 2, 3], )
    f6_out = (6, )

    f7_in = (['a', 'a', 'b'], )
    f7_out = ({'a': 2, 'b': 1}, )

    f8_in = ([1, 2, 3], )
    f8_out = (6, )

    for func, in_, out in ((f1, f1_in, f1_out), (f2, f2_in, f2_out),
                           (f3, f3_in, f3_out), (f4, f4_in, f4_out),
                           (f6, f6_in, f6_out), (f8, f8_in, f8_out)):
        _generic_test(func, in_, out)

                  reqs=Requirements(req_map={'PIL': 'pillow'}))
    _generic_test(f7, f7_in, f7_out, skip=_dict_skips)
Ejemplo n.º 7
def model_create_pipeline(transformer, funcName, inputIsSet, outputIsSet):
    from acumos.session import Requirements
    from acumos.modeling import Model, List, create_namedtuple
    from face_privacy_filter._version import MODEL_NAME, __version__ as VERSION
    import sklearn
    import cv2
    from os import path

    # derive the input type from the transformer
    input_type, type_name = transformer._type_in  # it looked like this [('test', int), ('tag', str)]
    type_in = create_namedtuple(type_name, input_type)
    input_set = type_in
    name_multiple_in = type_name
    if inputIsSet:
        name_multiple_in = type_name + "s"
        input_set = create_namedtuple(type_name + "Set",
                                      [(name_multiple_in, List[type_in])])

    # derive the output type from the transformer
    output_type, type_name = transformer._type_out
    type_out = create_namedtuple(type_name, output_type)
    output_set = type_out
    if outputIsSet:
        name_multiple_out = type_name + "s"
        output_set = create_namedtuple(type_name + "Set",
                                       [(name_multiple_out, List[type_out])])

    def transform(val_wrapped: input_set) -> output_set:
        '''Transform from image or detection and return score or image'''
        # print("-===== input -===== ")
        # print(input_set)
        if inputIsSet:
            df = pd.DataFrame(getattr(val_wrapped, name_multiple_in),
            df = pd.DataFrame([val_wrapped], columns=type_in._fields)
        # print("-===== df -===== ")
        # print(df)
        result_df = transformer.predict(df)
        # print("-===== out df -===== ")
        # print(result_df)
        # print(result_parts)
        result_parts = result_df.to_dict('split')
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".
              format("classify", MODEL_NAME, VERSION, len(df), type_in,
                     len(result_df), output_set))
        output_obj = []
        if len(df):
            if outputIsSet:
                output_obj = output_set(
                    [type_out(*r) for r in result_parts['data']])
                output_obj = output_set(*result_parts['data'][0])
        # print("-===== out list -===== ")
        # print(output_obj)
        return output_obj

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    objModelDeclare = {}
    objModelDeclare[funcName] = transform
    # add the model dependency manually because of the way we constructed the package;
    # the opencv-python/cv2 dependency is not picked up automagically
    return Model(**objModelDeclare), Requirements(
        reqs=[pd, np, sklearn, 'opencv-python'],
        req_map={cv2: 'opencv-python'})