예제 #1
0
def run_pipeline(events, models):

    tNameId = bt.Feature_id_transform(min_size=0,
                                      exclude_missing=True,
                                      zero_based=True,
                                      input_feature="name",
                                      output_feature="nameId")
    tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,
                                 exclude=["nameId", "name"])
    xgb = xg.XGBoostClassifier(target="nameId",
                               target_readable="name",
                               excluded=["name"],
                               learning_rate=0.1,
                               silent=1)
    cv = cf.Seldon_KFold(xgb, 5)
    logger.info("cross validation scores %s", cv.get_scores())

    transformers = [("tName", tNameId), ("tAuto", tAuto), ("cv", cv)]
    p = Pipeline(transformers)

    pw = sutl.Pipeline_wrapper()
    df = pw.create_dataframe_from_files(events)
    df2 = p.fit_transform(df)
    pw.save_pipeline(p, models)
    logger.info("cross validation scores %s", cv.get_scores())
예제 #2
0
def run_pipeline(events, models):

    tNameId = bt.Feature_id_transform(min_size=0,
                                      exclude_missing=True,
                                      zero_based=True,
                                      input_feature="name",
                                      output_feature="nameId")
    tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,
                                 exclude=["nameId", "name"])
    sk_classifier = RandomForestClassifier(verbose=1)
    classifier = ske.SKLearnClassifier(clf=sk_classifier,
                                       target="nameId",
                                       excluded=["name"])

    cv = cf.Seldon_KFold(classifier, 5)
    logger.info("cross validation scores %s", cv.get_scores())

    transformers = [("tName", tNameId), ("tAuto", tAuto), ("cv", cv)]
    p = Pipeline(transformers)

    pw = sutl.Pipeline_wrapper()
    df = pw.create_dataframe(events)
    df2 = p.fit_transform(df)
    pw.save_pipeline(p, models)
    logger.info("cross validation scores %s", cv.get_scores())
예제 #3
0
    def create_prediction_microservice(self, pipeline_folder, model_name):
        """
        Create a prediction Flask microservice app

        Parameters
        ----------

        pipeline_folder : str
           location of pipeline
        model_name : str
           model name to use for this pipeline
        """
        app = Flask(__name__)

        rint = random.randint(1, 999999)
        pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint),
                                   aws_key=self.aws_key,
                                   aws_secret=self.aws_secret)
        pipeline = pw.load_pipeline(pipeline_folder)

        app.config["seldon_pipeline_wrapper"] = pw
        app.config["seldon_pipeline"] = pipeline
        app.config["seldon_model_name"] = model_name

        app.register_blueprint(predict_blueprint)

        # other setup tasks
        return app
예제 #4
0
def run_pipeline(events,models):

    tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,exclude=["label"])
    detector = anod.iNNEDetector()

    wrapper = aw.AnomalyWrapper(clf=detector,excluded=["label"])

    transformers = [("tAuto",tAuto),("clf",wrapper)]
    p = Pipeline(transformers)

    pw = sutl.Pipeline_wrapper()
    df = pw.create_dataframe_from_files(events)
    df2 = p.fit_transform(df)
    pw.save_pipeline(p,models)
예제 #5
0
    def train(self, sample):

        tTfidf = ptfidf.Tfidf_transform(input_feature="review",
                                        output_feature="tfidf",
                                        target_feature="sentiment",
                                        min_df=10,
                                        max_df=0.7,
                                        select_features=False,
                                        topn_features=50000,
                                        stop_words="english",
                                        ngram_range=[1, 2])

        tFilter2 = bt.Include_features_transform(
            included=["tfidf", "sentiment"])

        svmTransform = bt.Svmlight_transform(output_feature="svmfeatures",
                                             excluded=["sentiment"],
                                             zero_based=False)

        classifier_xg = xg.XGBoostClassifier(target="sentiment",
                                             svmlight_feature="svmfeatures",
                                             silent=1,
                                             max_depth=5,
                                             n_estimators=200,
                                             objective='binary:logistic',
                                             scale_pos_weight=0.2)

        cv = cf.Seldon_KFold(classifier_xg,
                             metric='auc',
                             save_folds_folder="./folds")

        transformers = [("tTfidf", tTfidf), ("tFilter2", tFilter2),
                        ("svmTransform", svmTransform), ("cv", cv)]

        p = Pipeline(transformers)

        pw = sutl.Pipeline_wrapper()
        df = pw.create_dataframe_from_files([self.data_folder],
                                            df_format="csv")
        if sample < 1.0:
            logger.info("sampling dataset to size %s ", sample)
            df = df.sample(frac=sample, random_state=1)

        logger.info("Data frame shape %d , %d", df.shape[0], df.shape[1])

        df2 = p.fit_transform(df)
        pw.save_pipeline(p, self.model_folder)
        logger.info("cross validation scores %s", cv.get_scores())

        return p
예제 #6
0
def run_pipeline(events, models):
    tNameId = bt.Feature_id_transform(min_size=0,
                                      exclude_missing=True,
                                      zero_based=True,
                                      input_feature="name",
                                      output_feature="nameId")
    tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,
                                 exclude=["nameId", "name"])
    keras = sk.KerasClassifier(model_create=create_model,
                               target="nameId",
                               target_readable="name")
    transformers = [("tName", tNameId), ("tAuto", tAuto), ("keras", keras)]
    p = Pipeline(transformers)

    pw = sutl.Pipeline_wrapper()
    df = pw.create_dataframe(events)
    df2 = p.fit(df)
    pw.save_pipeline(p, models)
예제 #7
0
def run_pipeline(events, models):

    tNameId = bt.Feature_id_transform(min_size=0,
                                      exclude_missing=True,
                                      zero_based=True,
                                      input_feature="name",
                                      output_feature="nameId")
    tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,
                                 exclude=["nameId", "name"])
    xgb = xg.XGBoostClassifier(target="nameId",
                               target_readable="name",
                               excluded=["name"],
                               learning_rate=0.1,
                               silent=0)

    transformers = [("tName", tNameId), ("tAuto", tAuto), ("xgb", xgb)]
    p = Pipeline(transformers)

    pw = sutl.Pipeline_wrapper()
    df = pw.create_dataframe(events)
    df2 = p.fit(df)
    pw.save_pipeline(p, models)
예제 #8
0
import importlib
from flask import Flask, jsonify
from flask import request

app = Flask(__name__)
import json
import pprint
from sklearn.pipeline import Pipeline
import seldon.pipeline.util as sutl
import random

app.config.from_object('server_config')
rint = random.randint(1, 999999)
if 'AWS_KEY' in app.config:
    pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint),
                               aws_key=app.config['AWS_KEY'],
                               aws_secret=app.config['AWS_SECRET'])
else:
    pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint))
pipeline = pw.load_pipeline(app.config['PIPELINE'])


def extract_input():
    client = request.args.get('client')
    j = json.loads(request.args.get('json'))
    input = {"client": client, "json": j}
    return input


@app.route('/predict', methods=['GET'])
def predict():