import sys import pandas as pd import numpy as np from pyspark.sql import SparkSession from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf from lab_project_demo.models.ModelEvaluationPipeline import ModelEvaluationPipeline spark = SparkSession.builder.appName('ForecastingTest').getOrCreate() conf = read_config('train_config.yaml', sys.argv[1]) experimentID = setupMlflowConf(conf) p = ModelEvaluationPipeline(spark, experimentID, conf['model-name'], conf['data-path']) p.run()
import sys import pandas as pd import numpy as np from mlflow.tracking import MlflowClient from pyspark.sql import SparkSession from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf from lab_project_demo.models.TrainingPipeline import TrainingPipeline from lab_project_demo.models.ModelEvaluationPipeline import ModelEvaluationPipeline from lab_project_demo.models.ConsumerPipeline import ConsumerPipeline spark = SparkSession.builder.appName('ForecastingTest').getOrCreate() conf = read_config('e2e_int_config.yaml', sys.argv[1]) experimentID = setupMlflowConf(conf) limit = 100 # train p = TrainingPipeline(spark, conf['data-path'], conf['model-name'], limit=limit) p.run() spark_df = spark.read.format("mlflow-experiment").load(experimentID) assert spark_df.where("tags.candidate='true'").count() > 0 # deploy p = ModelEvaluationPipeline(spark, experimentID, conf['model-name'], conf['data-path'],
import sys from pyspark.sql import SparkSession import os from lab_project_demo.models.ConsumerPipeline import ConsumerPipeline from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf spark = SparkSession.builder.appName('Test').getOrCreate() conf = read_config('consumer_config.yaml', sys.argv[1]) setupMlflowConf(conf) p = ConsumerPipeline(spark, conf['data-path'], conf['output-path'], conf['model-name'], conf['stage']) p.run() spark.read.load(conf['output-path']).show(1000, False)