Esempio n. 1
0
def show_tuner_eval(tuning_job_name):
    tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)
        
    hover = HoverHelper(tuner)

    p = figure(plot_width=900, plot_height=400, tools=hover.tools(), x_axis_type='datetime')
    p.circle(source=df, x='TrainingStartTime', y='FinalObjectiveValue')
    show(p)
def get_tuner_status(xgb_tuner_job_name):
    """
    This functions get the status of a hyperparameter job and prints a tabulated output.
    
    :param: xgb_tuner_job_name - a string containing the label of the hyperparameter tuning job to query.
    
    :return: status of the job (Completed or not)
    :rtype: string
    """
    sagemaker_client = boto3.Session().client('sagemaker')
    tuning_job_result = sagemaker_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=xgb_tuner_job_name)
    status = tuning_job_result['HyperParameterTuningJobStatus']

    # While the tuning job is in progress...
    jobs_headers = [
        'TrainingJobName', 'FinalObjectiveValue', 'eta', 'gamma', 'max_depth',
        'subsample', 'colsample_bytree'
    ]
    while status == 'InProgress':
        # Clear the notebook output cell to update its content:
        clear_output()
        print('Tuning job in progress (status: {})'.format(status))

        # Check if at least one job started:
        job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
        if job_count > 0:
            print("{} training jobs are complete:".format(job_count))

            # Get the details about the different jobs launched by the hyperparameter tuning process
            jobs_df = sagemaker.HyperparameterTuningJobAnalytics(
                xgb_tuner_job_name).dataframe()

            # Sorts the dataframe and keep only the fields we are interested into:
            jobs_df = jobs_df.sort_values(by='TrainingStartTime',
                                          ascending=False)[jobs_headers]
            jobs_df = jobs_df[jobs_df['FinalObjectiveValue'] > -float('inf')]
            print(
                tabulate.tabulate(jobs_df,
                                  tablefmt='psql',
                                  headers=jobs_headers))

        else:
            print('No job completed yet.')

        time.sleep(30)

        tuning_job_result = sagemaker_client.describe_hyper_parameter_tuning_job(
            HyperParameterTuningJobName=xgb_tuner_job_name)
        status = tuning_job_result['HyperParameterTuningJobStatus']

    return status
Esempio n. 3
0
def get_tuner_results(tuning_job_name):
    tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

    full_df = tuner.dataframe()

    if len(full_df) > 0:
        df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
        if len(df) > 0:
            df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
            print("Number of training jobs with valid objective: %d" % len(df))
            print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
            pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
        else:
            print("No training jobs have reported valid results yet.")

    return df
Esempio n. 4
0
def get_tuner_results(tuning_job_name):
    region = boto3.Session().region_name
    sage_client = boto3.Session().client('sagemaker')

    tuning_job_result = sage_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=tuning_job_name)

    status = tuning_job_result['HyperParameterTuningJobStatus']
    if status != 'Completed':
        print('Reminder: the tuning job has not been completed.')

    job_count = tuning_job_result['TrainingJobStatusCounters']['Completed']
    print("{} training jobs have completed".format(job_count))

    is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']
                   ['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
    objective_name = tuning_job_result['HyperParameterTuningJobConfig'][
        'HyperParameterTuningJobObjective']['MetricName']
    jobs_df = sagemaker.HyperparameterTuningJobAnalytics(
        tuning_job_name).dataframe()
    df = pd.DataFrame()
    if len(jobs_df) > 0:
        df = jobs_df[jobs_df['FinalObjectiveValue'] > -float('inf')]
        if len(df) > 0:
            df = df.sort_values(by='FinalObjectiveValue',
                                ascending=is_minimize)
            print("Number of training jobs with valid objective: %d" % len(df))
            print({
                "lowest": min(df['FinalObjectiveValue']),
                "highest": max(df['FinalObjectiveValue'])
            })
            pd.set_option('display.max_colwidth',
                          None)  # Don't truncate TrainingJobName
        else:
            print("No training jobs have reported valid results yet.")

    return df
is_minimize = (tuning_job_result['HyperParameterTuningJobConfig']
               ['HyperParameterTuningJobObjective']['Type'] != 'Maximize')
objective_name = tuning_job_result['HyperParameterTuningJobConfig'][
    'HyperParameterTuningJobObjective']['MetricName']

from pprint import pprint
if tuning_job_result.get('BestTrainingJob', None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")

#Resultados como Dataframe
import pandas as pd

tuner = sagemaker.HyperparameterTuningJobAnalytics(tuning_job_name)

full_df = tuner.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({
            "lowest": min(df['FinalObjectiveValue']),
            "highest": max(df['FinalObjectiveValue'])
        })
        pd.set_option('display.max_colwidth',
                      -1)  # Don't truncate TrainingJobName
    else:
Esempio n. 6
0
from pprint import pprint
if tuning_job_result.get('BestTrainingJob',None):
    print("Best model found so far:")
    pprint(tuning_job_result['BestTrainingJob'])
else:
    print("No training jobs have reported results yet.")


# ### Fetch all results as a DataFrame
# 
# You can list hyperparameters and objective metrics of all training jobs and pick up the training job with the best objective metric.

# In[54]:


tuner_analytics = sagemaker.HyperparameterTuningJobAnalytics(tuner.latest_tuning_job.job_name)

full_df = tuner_analytics.dataframe()

if len(full_df) > 0:
    df = full_df[full_df['FinalObjectiveValue'] > -float('inf')]
    if len(df) > 0:
        df = df.sort_values('FinalObjectiveValue', ascending=is_minimize)
        print("Number of training jobs with valid objective: %d" % len(df))
        print({"lowest":min(df['FinalObjectiveValue']),"highest": max(df['FinalObjectiveValue'])})
        pd.set_option('display.max_colwidth', -1)  # Don't truncate TrainingJobName        
    else:
        print("No training jobs have reported valid results yet.")
        
df