Beispiel #1
0
def run_kfold(topic, no_folds, results_type, conf_matrix):
    """
    Runs kfold test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    output_loc_results = os.path.join(
        export_folder, "{}_kfold_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_kfold_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_kfold_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        kf = kfoldtest(n_folds=no_folds, apikey=instance_creds['apikey'],
                       url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        kf = kfoldtest(n_folds=no_folds, username=instance_creds['username'], password=instance_creds['password'], url=instance_creds['url'], threshold=workspace_thresh,
                       version=conversation_version)

    # get train df from watson + check there are sufficient workspaces to run the test
    train_df = kf.intent_df_from_watson(workspace_id)
    kf.check_sufficient_workspaces()

    # create folds in WA if above is true
    folds = kf.create_folds(method='kfold')
    kf.create_kfold_WA(folds)

    available_flag = False

    while available_flag == False:
        logger.info("Checking workspaces..")
        available_flag = kf.check_workspaces_status()
        time.sleep(20)

    # run kfold test
    try:
        results = kf.run_kfold_test(folds)

        if (results_type == 'raw') or (results_type == 'all'):
            results.to_csv(output_loc_results)

        classification_report = kf.create_classification_report(results)

        if (results_type == 'metrics') or (results_type == 'all'):
            metrics = Metrics(workspace_thresh)
            metric_df = metrics.get_all_metrics_CV(
                results, fold_col='fold', detailed_results=False)
            metric_df.to_csv(output_loc_metrics)

        # TODO: confusion matrix
        if conf_matrix:
            from confusionmatrix import ConfusionMatrix
            cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
            cfn.create(results, fig_path=output_loc_confmat)
            logger.info("Confusion matrix saved to {}".format(
                output_loc_confmat))

    finally:
        # regardless of what happens above, delete the temporary workspaces before exiting
        kf.delete_kfold_workspaces()
Beispiel #2
0
def run_blindset(topic, results_type, conf_matrix, blindset_name):
    """
    Runs blindset test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    print(instance_creds)
    print('print works')

    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    blindset_name = blindset_name or topic + "_blindset.csv"
    output_loc_results = os.path.join(
        export_folder, "{}_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        bs = blindset(apikey=instance_creds['apikey'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        bs = blindset(username=instance_creds['username'],
                      password=instance_creds['password'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)

    # run test
    blindset_df = bs.import_blindset(os.path.join(data_folder, blindset_name))
    # TODO: check blindset df
    results = bs.run_blind_test(blindset_df, workspace_id)

    # exports + metrics
    if (results_type == 'raw') or (results_type == 'all'):
        cols_export = [
            col for col in results.columns.values if col != 'intent_correct'
        ]
        results[cols_export].to_csv(output_loc_results, encoding='utf-8')
        logger.info("Raw results exported to {}".format(output_loc_results))

    if (results_type == 'metrics') or (results_type == 'all'):
        met = Metrics(workspace_thresh)
        metric_df, _ = met.get_all_metrics(results, detailed_results=True)

        metric_df.to_csv(output_loc_metrics, encoding='utf-8')
        logger.info(
            "Metrics per intent exported to {}".format(output_loc_metrics))

    # confusion matrix
    if conf_matrix:
        from confusionmatrix import ConfusionMatrix
        cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
        cfn.create(results, fig_path=output_loc_confmat)
        #bs.plot_confusion_matrix(results, output_loc_confmat)
        logger.info("Confusion matrix saved to {}".format(output_loc_confmat))

    # print high-level metrics
    overall_metrics = bs.calculate_overall_metrics(results,
                                                   av_method="weighted")
    logger.info("Overall metrics for the workspace (weighted):")
    logger.info(overall_metrics)