def deploy_and_test_model(sc, clipper_conn, model, version, link_model=False, predict_fn=predict): deploy_pyspark_model(clipper_conn, model_name, version, "integers", predict_fn, model, sc) time.sleep(5) if link_model: clipper_conn.link_model_to_app(app_name, model_name) time.sleep(5) test_model(clipper_conn, app_name, version)
func=testmodel1) elif input_type == 'string': deploy_python_closure(clipper_conn, name=model_name, version=version, input_type=input_type, func=testmodel2) elif args.deploy == 'pyspark': if input_type == 'double': deploy_python_closure(clipper_conn, name=model_name, version=version, input_type=input_type, func=testmodel5) elif input_type == 'string': model_path = os.path.join(model_dir, model_name) model_path = "./test-model" model = PipelineModel.load(model_path) deploy_pyspark_model(clipper_conn, name=model_name, version=version, input_type=input_type, func=testmodel4, pyspark_model=model, sc=spark.sparkContext) #----------------------------------------------------------------------- if new_app: print(' Link model %s to app %s' % (model_name, app_name)) clipper_conn.link_model_to_app(app_name, model_name)
TESTDATA = StringIO(inputs[0]) data = spark.createDataFrame(read_csv(TESTDATA, header=None, names=['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'])) feature_cols = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width'] assembler = pyspark.ml.feature.VectorAssembler(inputCols=feature_cols, outputCol='features') data = assembler.transform(data) data = data.select(['features', 'class']) label_indexer = pyspark.ml.feature.StringIndexer(inputCol='class', outputCol='label').fit(data) data = label_indexer.transform(data) data = data.select(['features', 'label']) output = model.transform(data).select("prediction").rdd.flatMap(lambda x: x).collect() return output deploy_pyspark_model( clipper_conn, name="iris-output", version=1, input_type="string", func=predict, pyspark_model=model, sc=sc, pkgs_to_install=["pandas"]) clipper_conn.register_application( name="iris-app", input_type="strings", default_output="-1", slo_micros=9000000) #will return default value in 9 seconds clipper_conn.link_model_to_app(app_name="iris-app", model_name="iris-output")
def run_test(): spark = SparkSession\ .builder\ .appName("clipper-pyspark")\ .getOrCreate() training = spark.createDataFrame([(0, "a b c d e spark", 1.0), (1, "b d", 0.0), (2, "spark f g h", 1.0), (3, "hadoop mapreduce", 0.0)], columns + ["label"]) # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression(maxIter=10, regParam=0.001) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) # Fit the pipeline to training documents. model = pipeline.fit(training) # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([(4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], columns) # Make predictions on test documents and print columns of interest. prediction = model.transform(test) selected = prediction.select("id", "text", "probability", "prediction") for row in selected.collect(): rid, text, prob, prediction = row print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction)) # test predict function print( predict(spark, model, [json.dumps((np.random.randint(1000), "spark abcd"))])) try: clipper_conn = create_docker_connection(cleanup=True, start_clipper=True) try: clipper_conn.register_application(app_name, "strings", "default_pred", 10000000) time.sleep(1) addr = clipper_conn.get_query_addr() response = requests.post("http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code != requests.codes.ok: print("Error: %s" % response.text) raise BenchmarkException("Error creating app %s" % app_name) version = 1 deploy_pyspark_model(clipper_conn, model_name, version, "strings", predict, model, spark.sparkContext) clipper_conn.link_model_to_app(app_name, model_name) time.sleep(30) num_preds = 25 num_defaults = 0 addr = clipper_conn.get_query_addr() for i in range(num_preds): response = requests.post( "http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code == requests.codes.ok and result[ "default"]: num_defaults += 1 if num_defaults > 0: print("Error: %d/%d predictions were default" % (num_defaults, num_preds)) if num_defaults > num_preds / 2: raise BenchmarkException("Error querying APP %s, MODEL %s:%d" % (app_name, model_name, version)) version += 1 deploy_pyspark_model(clipper_conn, model_name, version, "strings", predict, model, spark.sparkContext) time.sleep(30) num_preds = 25 num_defaults = 0 addr = clipper_conn.get_query_addr() for i in range(num_preds): response = requests.post( "http://%s/%s/predict" % (addr, app_name), headers=headers, data=json.dumps({ 'input': json.dumps((np.random.randint(1000), "spark abcd")) })) result = response.json() if response.status_code == requests.codes.ok and result[ "default"]: num_defaults += 1 if num_defaults > 0: print("Error: %d/%d predictions were default" % (num_defaults, num_preds)) if num_defaults > num_preds / 2: raise BenchmarkException("Error querying APP %s, MODEL %s:%d" % (app_name, model_name, version)) except BenchmarkException as e: log_docker(clipper_conn) log_clipper_state(clipper_conn) logger.exception("BenchmarkException") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) sys.exit(1) else: spark.stop() clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) logger.info("ALL TESTS PASSED") except Exception as e: log_docker(clipper_conn) logger.exception("Exception") clipper_conn = create_docker_connection(cleanup=True, start_clipper=False) sys.exit(1)