Esempio n. 1
0
import sys

import pandas as pd
import numpy as np
from mlflow.tracking import MlflowClient

from pyspark.sql import SparkSession

from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf
from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline
from lendingclub_scoring.pipelines.LendingClubModelEvaluationPipeline import LendingClubModelEvaluationPipeline
from lendingclub_scoring.pipelines.LendingClubConsumerPipeline import LendingClubConsumerPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('e2e_int_config.yaml', sys.argv[1])
experimentID = setupMlflowConf(conf)

limit = 100000

# train
p = LendingClubTrainingPipeline(spark,
                                conf['data-path'],
                                conf['model-name'],
                                limit=limit)
p.run()

spark_df = spark.read.format("mlflow-experiment").load(experimentID)
assert spark_df.where("tags.candidate='true'").count() > 0

# deploy
p = LendingClubModelEvaluationPipeline(spark,
import sys

import pandas as pd
import numpy as np

from pyspark.sql import SparkSession

from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf
from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('train_config.yaml', sys.argv[1])
setupMlflowConf(conf)
p = LendingClubTrainingPipeline(spark, conf['data-path'], conf['model-name'])
p.run()
Esempio n. 3
0
import sys

from pyspark.sql import SparkSession
import os
from lendingclub_scoring.pipelines.LendingClubConsumerPipeline import LendingClubConsumerPipeline
from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf

spark = SparkSession.builder.appName('Test').getOrCreate()
conf = read_config('consumer_config.yaml', sys.argv[1])
setupMlflowConf(conf)

p = LendingClubConsumerPipeline(spark, conf['data-path'],conf['output-path'],conf['model-name'], conf['stage'])
p.run()

spark.read.load(conf['output-path']).show(1000, False)