예제 #1
0
    def create_folds_mc(self, sample_size):
        """
        Create folds for a Monte-Carlo test. Each fold is a new and independent random sample, stratified 
        with respect to intent size.
        :param: proportion of training set held out as test set.
        """

        folds = []
        samples_per_intent = np.ceil(
            self.training_df.groupby('intent').count() * sample_size)
        intents = self.training_df['intent'].unique().tolist()

        for i in range(0, self.n_folds):
            train_inds = []
            test_inds = []

            for intent in intents:
                questions_intent = self.training_df[self.training_df['intent'] == intent]
                no_tosample = int(samples_per_intent['utterance'][intent])
                test_inds += questions_intent.sample(
                    n=no_tosample).index.tolist()
                train_inds += list(set(questions_intent.index.tolist()
                                       ) - set(test_inds))

            fold = {"train": train_inds,
                    "test": test_inds}
            folds.append(fold)
            logger.debug("fold num {}: train set: {}, test set: {}".format(
                i+1, len(fold["train"]), len(fold["test"])))

        return folds
예제 #2
0
def check_skills_exist(skill_list):
    """
    Check skills are in both the Credentials file and that blindsets exist for them.
    """

    bs_notexist = []
    cr_notexist = []

    topics_in_creds = Credentials.workspace_id[active_adoption].keys()
    get_bs_path = lambda s: os.path.join(config.data_dir, f"{s}_blindset.csv")

    for skill in skill_list:
        # append skill to bs_notexist if blindset doesn't exist
        bs_path = get_bs_path(skill)
        if not os.path.exists(bs_path):
            bs_notexist.append(skill)

        #  append skill to cr_notexist if credential doesn't exist
        if skill not in topics_in_creds:
            cr_notexist.append(skill)

    if len(bs_notexist) > 0 & len(cr_notexist) > 0:
        raise ValueError(
            "Blindsets don't exist for topics {}, and Credentials don't exist for topics {}. Please add these and try again"
            .format(bs_notexist, cr_notexist))
    elif len(bs_notexist) > 0:
        raise ValueError(
            "Blindsets don't exist for topics {}. Please add these and try again"
            .format(bs_notexist))
    elif len(cr_notexist) > 0:
        raise ValueError(
            "Credentials don't exist for topics {}. Please add these and try again"
            .format(cr_notexist))
    else:
        logger.debug("All skills exist in credentials and blindsets")
예제 #3
0
    def create_folds_kfold(self):
        """
        Create the folds for the k-fold test. It is using the Stratified K-fold division. 

        :param self.training_df: the dataframe containing the whole GT of the workspace 
        :return folds: a list of folds containing for each fold the train and test set indexes. 
        """

        folds = []
        i = 0
        skf = StratifiedKFold(n_splits=self.n_folds,
                              shuffle=True, random_state=2)
        for train_index, test_index in skf.split(self.training_df['utterance'], self.training_df['intent']):
            fold = {"train": train_index,
                    "test": test_index}
            folds.append(fold)
            logger.debug("fold num {}: train set: {}, test set: {}".format(
                i+1, len(fold["train"]), len(fold["test"])))
            i += 1

        return folds
예제 #4
0
    def check_workspaces_status(self):
        """
        check the status of the workspace just created - You can start the k-fold only when 
        the workspaces are `Available` and not in Training mode. 

        Returns available flag to be used inside a while loop
        """

        available_count = 0

        workspaces = self.workspaces

        for i in range(len(workspaces)):
            response = self.assistant.get_workspace(
                workspace_id=workspaces[i]).get_result()
            status = response['status']
            # The status can be: unavailable, training, non-existent, failed
            if (status == 'Available'):
                available_count += 1
                logger.debug("Workspace {} available".format(i+1))

        return available_count == len(workspaces)
예제 #5
0
    def __init__(self, **kwargs):

        if 'url' in kwargs:
            self.url = kwargs['url']
        else:
            raise ValueError("URL needs to be provided.")

        if 'threshold' in kwargs:
            self.threshold = kwargs['threshold']
        else:
            self.threshold = False
            logger.debug(
                'No threshold provided. Provide one when running the blindset test.'
            )

        # make sure all variables are here
        if ('apikey' not in kwargs) and (('username' not in kwargs) or
                                         ('password' not in kwargs)):
            raise ValueError(
                "One of username & password, or apikey must be present. ")

        if 'apikey' in kwargs:
            self.apikey = kwargs['apikey']
            self.auth_type = 'apikey'

        if ('username' in kwargs) and ('password' in kwargs):
            self.username = kwargs['username']
            self.password = kwargs['password']
            self.auth_type = 'password'

        if 'version' in kwargs:
            self.conversation_version = kwargs['version']
        else:
            self.conversation_version = '2018-07-10'

        # authenticate
        self.authenticate_watson()
예제 #6
0
def main(topic_list, conf_matrix, save_master_data):
    skill_list = process_list_argument(topic_list, val_type=str)
    master_skill_id = None  # so exception works
    master_thresh = Credentials.calculate_workspace_thresh("master")

    try:
        id_dict = {
            skill: Credentials.workspace_id[active_adoption][skill]
            for skill in skill_list
        }
        timestr = generate_timestamp()  #  for use in all filenames

        # authenticate
        if "apikey" in instance_creds:
            logger.debug("Authenticating (apikey)")
            bs = blindset.blindset(
                apikey=instance_creds["apikey"],
                url=instance_creds["url"],
                version=conversation_version,
            )
        elif "password" in instance_creds:
            logger.debug("Authenticating (username/password)")
            bs = blindset.blindset(
                username=instance_creds["username"],
                password=instance_creds["password"],
                url=instance_creds["url"],
                version=conversation_version,
            )

        # check skills exist
        check_skills_exist(skill_list)

        #  import blindsets and generate master
        logger.info("Importing all blindsets and combining into master")
        blind_dict = dict()
        for skill in skill_list:
            bs_path = os.path.join(config.data_dir, f"{skill}_blindset.csv")
            blind_dict[skill] = bs.import_blindset(bs_path)

        master_blind_allcols = pd.concat(
            [v.assign(topic=k) for k, v in blind_dict.items()],
            axis=0,
            ignore_index=True,
            sort=False,
        )
        master_blind = master_blind_allcols[[
            "utterance", "topic"
        ]].rename(columns={"topic": "expected intent"})

        # generate master from topic training and push to WA
        logger.info("Getting training data from WA")
        train_dict = dict()
        for skill in skill_list:
            train_dict[skill] = wa_utils.get_training_data(
                bs.assistant, id_dict[skill])

        logger.info("Creating temporary master skill")
        master_train = pd.concat(
            [
                v.drop(columns=["intent"]).assign(intent=k)
                for k, v in train_dict.items()
            ],
            axis=0,
            ignore_index=True,
            sort=False,
        )
        master_skill_id = wa_utils.create_workspace_from_df(
            bs.assistant,
            name="master",
            train_df=master_train,
            description="generated by intent_training_tools",
        )

        # run blindset on master
        logger.info("Running blindset on master..")
        results_master = bs.run_blind_test(master_blind,
                                           master_skill_id,
                                           threshold=master_thresh)
        results_master["routing"] = results_master["intent1"]
        results_master.loc[results_master["confidence1"] < master_thresh,
                           "routing"] = "anything_else"

        # create blindsets for topics based on master results
        newblind_dict = dict()
        for skill in skill_list:
            # blindset for each skill is made up of utterances that have landed in that skill for master
            blind_utterances = results_master.loc[
                (results_master["intent1"] == skill)
                & (results_master["confidence1"] >= master_thresh),
                "original_text", ].tolist()
            newblind = master_blind_allcols[master_blind_allcols["utterance"].
                                            isin(blind_utterances)].copy()
            newblind.loc[newblind["topic"] != skill,
                         "expected intent"] = "anything_else"
            newblind_dict[skill] = newblind[["utterance", "expected intent"
                                             ]].reset_index(drop=True)

        # run blindsets on topics
        logger.info("Running blindset on topic skills..")
        results_dict = dict()
        for skill in skill_list:
            results_dict[skill] = bs.run_blind_test(
                newblind_dict[skill],
                id_dict[skill],
                threshold=Credentials.calculate_workspace_thresh(skill),
            )

        #  plot confusion matrices
        if conf_matrix:
            from conversation_test.confusionmatrix import ConfusionMatrix

            conf_output_path = lambda s: os.path.join(
                config.output_folder, f"{s}_multi_confmat_{timestr}.png")

            # master
            cfn = ConfusionMatrix(workspace_thresh=master_thresh)
            cfn.create(results_master, fig_path=conf_output_path("master"))

            #  topics
            for skill in skill_list:
                cfn = ConfusionMatrix(workspace_thresh=Credentials.
                                      calculate_workspace_thresh(skill))
                cfn.create(results_dict[skill],
                           fig_path=conf_output_path(skill))

            logger.info("Confusion matrix saved to results folder")

        # calculate metrics
        # master
        met = Metrics(workspace_thresh=master_thresh)
        metrics_master, _ = met.get_all_metrics(results_master,
                                                detailed_results=True)

        # topics
        metrics_dict = dict()
        res_with_conf_dict = dict()
        for skill in skill_list:
            met = Metrics(
                workspace_thresh=Credentials.calculate_workspace_thresh(skill))
            metrics_dict[skill], res_with_conf_dict[
                skill] = met.get_all_metrics(results_dict[skill],
                                             detailed_results=True)

        # topics - create overall view as if it's a single skill
        topics_res_with_conf = pd.concat(
            [v for k, v in res_with_conf_dict.items()],
            ignore_index=True,
            sort=False)

        results_master.loc[results_master["routing"] == "anything_else",
                           'confusion'] = 'FN'

        topics_res_with_conf = topics_res_with_conf.append(
            results_master,
            ignore_index=True,
            sort=False,
        )
        metrics_overall = met.calculate_metrics_per_intent(
            topics_res_with_conf, detailed_results=True)

        metrics_overall.loc[metrics_overall.index.isin(skill_list),
                            'threshold'] = master_thresh
        metrics_overall = metrics_overall.rename(
            index={s: s + ' - anything else'
                   for s in skill_list})

        # export results
        for skill in skill_list:
            results_dict[skill].to_csv(
                os.path.join(config.output_folder,
                             f"{skill}_multi_results_{timestr}.csv"),
                index=None,
            )
            metrics_dict[skill].to_csv(
                os.path.join(config.output_folder,
                             f"{skill}_multi_metrics_{timestr}.csv"))

        results_master.to_csv(
            os.path.join(config.output_folder,
                         f"master_multi_results_{timestr}.csv"),
            index=None,
        )
        metrics_master.to_csv(
            os.path.join(config.output_folder,
                         f"master_multi_metrics_{timestr}.csv"))
        metrics_overall.to_csv(
            os.path.join(config.output_folder,
                         f"overall_multi_metrics_{timestr}.csv"))
        logger.info("Results and metrics saved to output folder")

        if save_master_data:
            # export master blindset with both intent and topic labels to CSV
            master_blind_allcols.to_csv(
                os.path.join(config.data_dir,
                             f"master_blindset_{timestr}.csv"),
                index=None,
            )

            # export master training to CSV
            master_train.to_csv(
                os.path.join(config.data_dir,
                             f"master_training_{timestr}.csv"),
                header=None,
                index=None,
            )

            logger.info(
                "Master blindset and training have also been saved to the data folder"
            )

        #  delete master skill
        logger.info("Deleting temporary master skill")
        wa_utils.delete_workspace(bs.assistant, master_skill_id)

    except Exception as e:
        if master_skill_id is not None:
            # make sure master deleted anyway
            logger.info("Deleting temporary master skill before exit")
            wa_utils.delete_workspace(bs.assistant, master_skill_id)

        raise e
예제 #7
0
def run_kfold(topic, no_folds, results_type, conf_matrix):
    """
    Runs kfold test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    output_loc_results = os.path.join(
        export_folder, "{}_kfold_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_kfold_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_kfold_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        kf = kfoldtest(n_folds=no_folds, apikey=instance_creds['apikey'],
                       url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        kf = kfoldtest(n_folds=no_folds, username=instance_creds['username'], password=instance_creds['password'], url=instance_creds['url'], threshold=workspace_thresh,
                       version=conversation_version)

    # get train df from watson + check there are sufficient workspaces to run the test
    train_df = kf.intent_df_from_watson(workspace_id)
    kf.check_sufficient_workspaces()

    # create folds in WA if above is true
    folds = kf.create_folds(method='kfold')
    kf.create_kfold_WA(folds)

    available_flag = False

    while available_flag == False:
        logger.info("Checking workspaces..")
        available_flag = kf.check_workspaces_status()
        time.sleep(20)

    # run kfold test
    try:
        results = kf.run_kfold_test(folds)

        if (results_type == 'raw') or (results_type == 'all'):
            results.to_csv(output_loc_results)

        classification_report = kf.create_classification_report(results)

        if (results_type == 'metrics') or (results_type == 'all'):
            metrics = Metrics(workspace_thresh)
            metric_df = metrics.get_all_metrics_CV(
                results, fold_col='fold', detailed_results=False)
            metric_df.to_csv(output_loc_metrics)

        # TODO: confusion matrix
        if conf_matrix:
            from confusionmatrix import ConfusionMatrix
            cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
            cfn.create(results, fig_path=output_loc_confmat)
            logger.info("Confusion matrix saved to {}".format(
                output_loc_confmat))

    finally:
        # regardless of what happens above, delete the temporary workspaces before exiting
        kf.delete_kfold_workspaces()
예제 #8
0
def run_blindset(topic, results_type, conf_matrix, blindset_name):
    """
    Runs blindset test using credentials in ../Credentials.py
    """

    # get credentials, import + export folders
    import Credentials
    active_adoption = Credentials.active_adoption
    instance_creds = Credentials.ctx[active_adoption]
    print(instance_creds)
    print('print works')

    workspace_id = Credentials.workspace_id[active_adoption][topic]
    workspace_thresh = Credentials.calculate_workspace_thresh(topic)
    conversation_version = Credentials.conversation_version

    # import + export folders
    import config
    import time
    data_folder = config.data_dir
    export_folder = config.output_folder
    timestr = time.strftime("%Y%m%d-%H%M")

    blindset_name = blindset_name or topic + "_blindset.csv"
    output_loc_results = os.path.join(
        export_folder, "{}_results_raw_{}.csv".format(topic, timestr))
    output_loc_metrics = os.path.join(
        export_folder, "{}_results_metrics_{}.csv".format(topic, timestr))
    output_loc_confmat = os.path.join(
        export_folder, "{}_confmat_{}.png".format(topic, timestr))

    # authenticate
    if 'apikey' in instance_creds:
        logger.debug("Authenticating (apikey)")
        bs = blindset(apikey=instance_creds['apikey'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)
    elif 'password' in instance_creds:
        logger.debug("Authenticating (username/password)")
        bs = blindset(username=instance_creds['username'],
                      password=instance_creds['password'],
                      url=instance_creds['url'],
                      threshold=workspace_thresh,
                      version=conversation_version)

    # run test
    blindset_df = bs.import_blindset(os.path.join(data_folder, blindset_name))
    # TODO: check blindset df
    results = bs.run_blind_test(blindset_df, workspace_id)

    # exports + metrics
    if (results_type == 'raw') or (results_type == 'all'):
        cols_export = [
            col for col in results.columns.values if col != 'intent_correct'
        ]
        results[cols_export].to_csv(output_loc_results, encoding='utf-8')
        logger.info("Raw results exported to {}".format(output_loc_results))

    if (results_type == 'metrics') or (results_type == 'all'):
        met = Metrics(workspace_thresh)
        metric_df, _ = met.get_all_metrics(results, detailed_results=True)

        metric_df.to_csv(output_loc_metrics, encoding='utf-8')
        logger.info(
            "Metrics per intent exported to {}".format(output_loc_metrics))

    # confusion matrix
    if conf_matrix:
        from confusionmatrix import ConfusionMatrix
        cfn = ConfusionMatrix(workspace_thresh=workspace_thresh)
        cfn.create(results, fig_path=output_loc_confmat)
        #bs.plot_confusion_matrix(results, output_loc_confmat)
        logger.info("Confusion matrix saved to {}".format(output_loc_confmat))

    # print high-level metrics
    overall_metrics = bs.calculate_overall_metrics(results,
                                                   av_method="weighted")
    logger.info("Overall metrics for the workspace (weighted):")
    logger.info(overall_metrics)