Example #1
0
import sys

import pandas as pd
import numpy as np

from pyspark.sql import SparkSession

from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf
from lab_project_demo.models.ModelEvaluationPipeline import ModelEvaluationPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('train_config.yaml', sys.argv[1])
experimentID = setupMlflowConf(conf)
p = ModelEvaluationPipeline(spark, experimentID, conf['model-name'],
                            conf['data-path'])
p.run()
Example #2
0
import sys

import pandas as pd
import numpy as np
from mlflow.tracking import MlflowClient

from pyspark.sql import SparkSession

from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf
from lab_project_demo.models.TrainingPipeline import TrainingPipeline
from lab_project_demo.models.ModelEvaluationPipeline import ModelEvaluationPipeline
from lab_project_demo.models.ConsumerPipeline import ConsumerPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('e2e_int_config.yaml', sys.argv[1])
experimentID = setupMlflowConf(conf)

limit = 100

# train
p = TrainingPipeline(spark, conf['data-path'], conf['model-name'], limit=limit)
p.run()

spark_df = spark.read.format("mlflow-experiment").load(experimentID)
assert spark_df.where("tags.candidate='true'").count() > 0

# deploy
p = ModelEvaluationPipeline(spark,
                            experimentID,
                            conf['model-name'],
                            conf['data-path'],
import sys

from pyspark.sql import SparkSession
import os
from lab_project_demo.models.ConsumerPipeline import ConsumerPipeline
from lab_project_demo.config.ConfigProvider import read_config, setupMlflowConf

spark = SparkSession.builder.appName('Test').getOrCreate()
conf = read_config('consumer_config.yaml', sys.argv[1])
setupMlflowConf(conf)

p = ConsumerPipeline(spark, conf['data-path'], conf['output-path'],
                     conf['model-name'], conf['stage'])
p.run()

spark.read.load(conf['output-path']).show(1000, False)