コード例 #1
0
def deploy_and_test_model(sc,
                          clipper_conn,
                          model,
                          version,
                          link_model=False,
                          predict_fn=predict):
    deploy_pyspark_model(clipper_conn, model_name, version, "integers",
                         predict_fn, model, sc)

    time.sleep(5)

    if link_model:
        clipper_conn.link_model_to_app(app_name, model_name)
        time.sleep(5)

    test_model(clipper_conn, app_name, version)
コード例 #2
0
                                  func=testmodel1)
        elif input_type == 'string':
            deploy_python_closure(clipper_conn,
                                  name=model_name,
                                  version=version,
                                  input_type=input_type,
                                  func=testmodel2)
    elif args.deploy == 'pyspark':
        if input_type == 'double':
            deploy_python_closure(clipper_conn,
                                  name=model_name,
                                  version=version,
                                  input_type=input_type,
                                  func=testmodel5)
        elif input_type == 'string':
            model_path = os.path.join(model_dir, model_name)
            model_path = "./test-model"
            model = PipelineModel.load(model_path)
            deploy_pyspark_model(clipper_conn,
                                 name=model_name,
                                 version=version,
                                 input_type=input_type,
                                 func=testmodel4,
                                 pyspark_model=model,
                                 sc=spark.sparkContext)
    #-----------------------------------------------------------------------

    if new_app:
        print('    Link model %s to app %s' % (model_name, app_name))
        clipper_conn.link_model_to_app(app_name, model_name)
コード例 #3
0
	TESTDATA = StringIO(inputs[0])
	data = spark.createDataFrame(read_csv(TESTDATA, header=None, names=['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']))
	feature_cols = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width']
	assembler = pyspark.ml.feature.VectorAssembler(inputCols=feature_cols, outputCol='features')
	data = assembler.transform(data)
	data = data.select(['features', 'class'])
	label_indexer = pyspark.ml.feature.StringIndexer(inputCol='class', outputCol='label').fit(data)
	data = label_indexer.transform(data)
	data = data.select(['features', 'label'])
	output = model.transform(data).select("prediction").rdd.flatMap(lambda x: x).collect()
	return output

deploy_pyspark_model(
    clipper_conn,
    name="iris-output",
    version=1,
    input_type="string",
    func=predict,
    pyspark_model=model,
    sc=sc,
    pkgs_to_install=["pandas"])

clipper_conn.register_application(
	name="iris-app",
	input_type="strings",
	default_output="-1",
    slo_micros=9000000) #will return default value in 9 seconds

clipper_conn.link_model_to_app(app_name="iris-app", model_name="iris-output")

コード例 #4
0
def run_test():
    spark = SparkSession\
        .builder\
        .appName("clipper-pyspark")\
        .getOrCreate()

    training = spark.createDataFrame([(0, "a b c d e spark", 1.0),
                                      (1, "b d", 0.0), (2, "spark f g h", 1.0),
                                      (3, "hadoop mapreduce", 0.0)],
                                     columns + ["label"])

    # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
    tokenizer = Tokenizer(inputCol="text", outputCol="words")
    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(),
                          outputCol="features")
    lr = LogisticRegression(maxIter=10, regParam=0.001)
    pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])

    # Fit the pipeline to training documents.
    model = pipeline.fit(training)

    # Prepare test documents, which are unlabeled (id, text) tuples.
    test = spark.createDataFrame([(4, "spark i j k"), (5, "l m n"),
                                  (6, "spark hadoop spark"),
                                  (7, "apache hadoop")], columns)

    # Make predictions on test documents and print columns of interest.
    prediction = model.transform(test)
    selected = prediction.select("id", "text", "probability", "prediction")
    for row in selected.collect():
        rid, text, prob, prediction = row
        print("(%d, %s) --> prob=%s, prediction=%f" %
              (rid, text, str(prob), prediction))

    # test predict function
    print(
        predict(spark, model,
                [json.dumps((np.random.randint(1000), "spark abcd"))]))

    try:
        clipper_conn = create_docker_connection(cleanup=True,
                                                start_clipper=True)

        try:
            clipper_conn.register_application(app_name, "strings",
                                              "default_pred", 10000000)
            time.sleep(1)

            addr = clipper_conn.get_query_addr()
            response = requests.post("http://%s/%s/predict" % (addr, app_name),
                                     headers=headers,
                                     data=json.dumps({
                                         'input':
                                         json.dumps((np.random.randint(1000),
                                                     "spark abcd"))
                                     }))
            result = response.json()
            if response.status_code != requests.codes.ok:
                print("Error: %s" % response.text)
                raise BenchmarkException("Error creating app %s" % app_name)

            version = 1
            deploy_pyspark_model(clipper_conn, model_name, version, "strings",
                                 predict, model, spark.sparkContext)
            clipper_conn.link_model_to_app(app_name, model_name)
            time.sleep(30)
            num_preds = 25
            num_defaults = 0
            addr = clipper_conn.get_query_addr()
            for i in range(num_preds):
                response = requests.post(
                    "http://%s/%s/predict" % (addr, app_name),
                    headers=headers,
                    data=json.dumps({
                        'input':
                        json.dumps((np.random.randint(1000), "spark abcd"))
                    }))
                result = response.json()
                if response.status_code == requests.codes.ok and result[
                        "default"]:
                    num_defaults += 1
            if num_defaults > 0:
                print("Error: %d/%d predictions were default" %
                      (num_defaults, num_preds))
            if num_defaults > num_preds / 2:
                raise BenchmarkException("Error querying APP %s, MODEL %s:%d" %
                                         (app_name, model_name, version))

            version += 1
            deploy_pyspark_model(clipper_conn, model_name, version, "strings",
                                 predict, model, spark.sparkContext)
            time.sleep(30)
            num_preds = 25
            num_defaults = 0
            addr = clipper_conn.get_query_addr()
            for i in range(num_preds):
                response = requests.post(
                    "http://%s/%s/predict" % (addr, app_name),
                    headers=headers,
                    data=json.dumps({
                        'input':
                        json.dumps((np.random.randint(1000), "spark abcd"))
                    }))
                result = response.json()
                if response.status_code == requests.codes.ok and result[
                        "default"]:
                    num_defaults += 1
            if num_defaults > 0:
                print("Error: %d/%d predictions were default" %
                      (num_defaults, num_preds))
            if num_defaults > num_preds / 2:
                raise BenchmarkException("Error querying APP %s, MODEL %s:%d" %
                                         (app_name, model_name, version))
        except BenchmarkException as e:
            log_docker(clipper_conn)
            log_clipper_state(clipper_conn)
            logger.exception("BenchmarkException")
            clipper_conn = create_docker_connection(cleanup=True,
                                                    start_clipper=False)
            sys.exit(1)
        else:
            spark.stop()
            clipper_conn = create_docker_connection(cleanup=True,
                                                    start_clipper=False)
            logger.info("ALL TESTS PASSED")
    except Exception as e:
        log_docker(clipper_conn)
        logger.exception("Exception")
        clipper_conn = create_docker_connection(cleanup=True,
                                                start_clipper=False)
        sys.exit(1)