Example #1
0
def visualize_training(experiment_name):
    model_name = "run"

    experiment_dir = experiment_name + '_' + model_name
    training_statistics = load_training_statistics(experiment_name, model_name)

    output_dir = get_ludwig_output_dir() / 'viz' / experiment_dir
    list_of_stats = [training_statistics]

    list_of_models = [experiment_dir]
    learning_curves(
        list_of_stats,
        output_feature_name='recommended',
        model_names=list_of_models,
        output_directory=output_dir,
        file_format='png',
    )
    print(f'{output_dir=}')
Example #2
0
def test_learning_curves_vis_api(experiment_to_use):
    """Ensure pdf and png figures can be saved via visualization API call.

    :param experiment_to_use: Object containing trained model and results to
        test visualization
    :return: None
    """
    experiment = experiment_to_use
    viz_outputs = ("pdf", "png")
    with TemporaryDirectory() as tmpvizdir:
        for viz_output in viz_outputs:
            vis_output_pattern_pdf = tmpvizdir + f"/*.{viz_output}"
            visualize.learning_curves([experiment.train_stats],
                                      output_feature_name=None,
                                      output_directory=tmpvizdir,
                                      file_format=viz_output)
            figure_cnt = glob.glob(vis_output_pattern_pdf)
            assert 3 == len(figure_cnt)
Example #3
0
def test_learning_curves_vis_api(csv_filename):
    """Ensure pdf and png figures can be saved via visualization API call.

    :param csv_filename: csv fixture from tests.fixtures.filenames.csv_filename
    :return: None
    """
    experiment = Experiment(csv_filename)
    viz_outputs = ('pdf', 'png')
    for viz_output in viz_outputs:
        vis_output_pattern_pdf = experiment.output_dir + '/*.{}'.format(
            viz_output)
        visualize.learning_curves(experiment.train_stats,
                                  output_feature_name=None,
                                  output_directory=experiment.output_dir,
                                  file_format=viz_output)
        figure_cnt = glob.glob(vis_output_pattern_pdf)
        assert 4 == len(figure_cnt)
    shutil.rmtree(experiment.output_dir, ignore_errors=True)
Example #4
0
def visualize_training():
    assert False
    experiment_name = 'rt'
    model_name = "run"

    experiment_dir = experiment_name + '_' + model_name
    training_statistics = load_training_statistics(experiment_name, model_name)

    output_directory = SCRIPT_DIR / 'output' / 'visualizations'
    list_of_stats = [training_statistics]

    list_of_models = [experiment_dir]
    learning_curves(
        list_of_stats,
        output_feature_name='recommended',
        model_names=list_of_models,
        output_directory=output_directory,
        file_format='png',
    )
Example #5
0
def visualize_training_bak():
    results_path = SCRIPT_DIR / 'output' / 'results'
    results_dirs = [d for d in results_path.glob('*') if d.is_dir()]
    latest_dirs = sorted(results_dirs, key=lambda f: f.name, reverse=True)
    if len(latest_dirs) == 0:
        sys.exit('Cannot find results dir in {}'.format(results_path))

    experiment_dir = latest_dirs[0]
    print('get training statistics from {}'.format(experiment_dir))
    # training_statistics = load_training_statistics(experiment_dir)

    output_directory = (SCRIPT_DIR / 'output' / 'visualizations' /
                        experiment_dir.name)
    # list_of_stats = [training_statistics]
    list_of_stats: List[str] = []

    list_of_models = [experiment_dir.name]
    learning_curves(
        list_of_stats,
        output_feature_name='recommended',
        model_names=list_of_models,
        output_directory=output_directory,
        file_format='png',
    )
    # set up Python dictionary to hold model training parameters
    model_definition = base_model.copy()
    model_definition['input_features'][0]['fc_layers'] = model_option.fc_layers
    model_definition['training']['epochs'] = 8

    # Define Ludwig model object that drive model training
    model = LudwigModel(model_definition, logging_level=logging.INFO)

    # initiate model training
    train_stats = model.train(data_csv='./data/mnist_dataset_training.csv',
                              experiment_name='multiple_experiment',
                              model_name=model_option.name)

    # save training stats for later use
    list_of_train_stats.append(
        TrainingResult(name=model_option.name, train_stats=train_stats))

    print('>>>>>>> completed: ', model_option.name, '\n')

    model.close()

# generating learning curves from training
option_names = [trs.name for trs in list_of_train_stats]
train_stats = [trs.train_stats for trs in list_of_train_stats]
learning_curves(train_stats,
                'Survived',
                model_names=option_names,
                output_directory='./visualizations',
                file_format='png')
Example #7
0
shutil.rmtree('./visualizations', ignore_errors=True)

# list models to train
list_of_model_ids = ['model1', 'model2']
list_of_train_stats = []

# ## Train models
for model_id in list_of_model_ids:
    print('>>>> training: ', model_id)

    # Define Ludwig model object that drive model training
    model = LudwigModel(config='./' + model_id + '_config.yaml',
                        logging_level=logging.WARN)

    # initiate model training
    train_stats, _, _ = model.train(dataset='./data/train.csv',
                                    experiment_name='multiple_experiment',
                                    model_name=model_id)

    # save training stats for later use
    list_of_train_stats.append(train_stats)

    print('>>>>>>> completed: ', model_id, '\n')

# generating learning curves from training
learning_curves(list_of_train_stats,
                'Survived',
                model_names=list_of_model_ids,
                output_directory='./visualizations',
                file_format='png')
    # set up Python dictionary to hold model training parameters
    config = base_model.copy()
    config["input_features"][0]["fc_layers"] = model_option.fc_layers
    config[TRAINER]["epochs"] = 5

    # Define Ludwig model object that drive model training
    model = LudwigModel(config, logging_level=logging.INFO)

    # initiate model training
    train_stats, _, _ = model.train(
        training_set=training_set,
        test_set=test_set,
        experiment_name="multiple_experiment",
        model_name=model_option.name,
    )

    # save training stats for later use
    list_of_train_stats.append(
        TrainingResult(name=model_option.name, train_stats=train_stats))

    print(">>>>>>> completed: ", model_option.name, "\n")

# generating learning curves from training
option_names = [trs.name for trs in list_of_train_stats]
train_stats = [trs.train_stats for trs in list_of_train_stats]
learning_curves(train_stats,
                "Survived",
                model_names=option_names,
                output_directory="./visualizations",
                file_format="png")
Example #9
0
# list models to train
list_of_model_ids = ["model1", "model2"]
list_of_train_stats = []

training_set, _, _ = titanic.load(split=True)

# ## Train models
for model_id in list_of_model_ids:
    print(">>>> training: ", model_id)

    # Define Ludwig model object that drive model training
    model = LudwigModel(config="./" + model_id + "_config.yaml", logging_level=logging.WARN)

    # initiate model training
    train_stats, _, _ = model.train(dataset=training_set, experiment_name="multiple_experiment", model_name=model_id)

    # save training stats for later use
    list_of_train_stats.append(train_stats)

    print(">>>>>>> completed: ", model_id, "\n")

# generating learning curves from training
learning_curves(
    list_of_train_stats,
    "Survived",
    model_names=list_of_model_ids,
    output_directory="./visualizations",
    file_format="png",
)
Example #10
0
#
# Example demonstrating visual api
#
from ludwig.visualize import learning_curves
import json

# read in training statistics
with open('./results_api/api_experiment_run/training_statistics.json') as f:
    training_stats = json.load(f)

# generating learning curves
learning_curves(training_stats,
                'label',
                output_directory='./viz_api',
                file_format='png')
Example #11
0
           "./profile_images")

    with open("./config.yaml") as f:
        config = yaml.safe_load(f.read())

    model = LudwigModel(config, logging_level=logging.INFO)

    train_stats, preprocessed_data, output_directory = model.train(
        dataset=training_set)

    # Generates predictions and performance statistics for the test set.
    test_stats, predictions, output_directory = model.evaluate(
        test_set, collect_predictions=True, collect_overall_stats=True)

    confusion_matrix(
        [test_stats],
        model.training_set_metadata,
        "account_type",
        top_n_classes=[2],
        model_names=[""],
        normalize=True,
        output_directory="./visualizations",
        file_format="png",
    )

    # Visualizes learning curves, which show how performance metrics changed over time during training.
    learning_curves(train_stats,
                    output_feature_name="account_type",
                    output_directory="./visualizations",
                    file_format="png")
start_time = time.time()
model = LudwigModel(model_definition_file='./LudwigModelDefinitionFile.yml')
train_stats = model.train(data_df=breast_cancer_dataset_train,
                          skip_save_model=True,
                          skip_save_processed_input=True,
                          skip_save_training_statistics=True,
                          skip_save_training_description=True,
                          skip_save_log=True,
                          skip_save_progress=True)
training_time = time.time() - start_time

# Visualize training statistics
from ludwig.visualize import learning_curves

learning_curves(train_stats, output_feature_name='label')

# Predict and print statistics
pred = model.predict(data_df=X_test)
predictions = pred['label_predictions']
Y_test = Y_test == 1  # Change labels from 0/1 to False/True

pred_correct = []
for i in range(1, len(Y_test)):
    pred_correct.append(predictions.iloc[i - 1] == Y_test.iloc[i - 1])

print("No. of correct predictions = {}".format(sum(pred_correct)))
print("No. of incorrect predictions = {}".format(
    len(Y_test) - sum(pred_correct)))
print("Training time = {} seconds".format(round(training_time, 2)))