def create_folds_mc(self, sample_size): """ Create folds for a Monte-Carlo test. Each fold is a new and independent random sample, stratified with respect to intent size. :param: proportion of training set held out as test set. """ folds = [] samples_per_intent = np.ceil( self.training_df.groupby('intent').count() * sample_size) intents = self.training_df['intent'].unique().tolist() for i in range(0, self.n_folds): train_inds = [] test_inds = [] for intent in intents: questions_intent = self.training_df[self.training_df['intent'] == intent] no_tosample = int(samples_per_intent['utterance'][intent]) test_inds += questions_intent.sample( n=no_tosample).index.tolist() train_inds += list(set(questions_intent.index.tolist() ) - set(test_inds)) fold = {"train": train_inds, "test": test_inds} folds.append(fold) logger.debug("fold num {}: train set: {}, test set: {}".format( i+1, len(fold["train"]), len(fold["test"]))) return folds
def check_skills_exist(skill_list): """ Check skills are in both the Credentials file and that blindsets exist for them. """ bs_notexist = [] cr_notexist = [] topics_in_creds = Credentials.workspace_id[active_adoption].keys() get_bs_path = lambda s: os.path.join(config.data_dir, f"{s}_blindset.csv") for skill in skill_list: # append skill to bs_notexist if blindset doesn't exist bs_path = get_bs_path(skill) if not os.path.exists(bs_path): bs_notexist.append(skill) # append skill to cr_notexist if credential doesn't exist if skill not in topics_in_creds: cr_notexist.append(skill) if len(bs_notexist) > 0 & len(cr_notexist) > 0: raise ValueError( "Blindsets don't exist for topics {}, and Credentials don't exist for topics {}. Please add these and try again" .format(bs_notexist, cr_notexist)) elif len(bs_notexist) > 0: raise ValueError( "Blindsets don't exist for topics {}. Please add these and try again" .format(bs_notexist)) elif len(cr_notexist) > 0: raise ValueError( "Credentials don't exist for topics {}. Please add these and try again" .format(cr_notexist)) else: logger.debug("All skills exist in credentials and blindsets")
def create_folds_kfold(self): """ Create the folds for the k-fold test. It is using the Stratified K-fold division. :param self.training_df: the dataframe containing the whole GT of the workspace :return folds: a list of folds containing for each fold the train and test set indexes. """ folds = [] i = 0 skf = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=2) for train_index, test_index in skf.split(self.training_df['utterance'], self.training_df['intent']): fold = {"train": train_index, "test": test_index} folds.append(fold) logger.debug("fold num {}: train set: {}, test set: {}".format( i+1, len(fold["train"]), len(fold["test"]))) i += 1 return folds
def check_workspaces_status(self): """ check the status of the workspace just created - You can start the k-fold only when the workspaces are `Available` and not in Training mode. Returns available flag to be used inside a while loop """ available_count = 0 workspaces = self.workspaces for i in range(len(workspaces)): response = self.assistant.get_workspace( workspace_id=workspaces[i]).get_result() status = response['status'] # The status can be: unavailable, training, non-existent, failed if (status == 'Available'): available_count += 1 logger.debug("Workspace {} available".format(i+1)) return available_count == len(workspaces)
def __init__(self, **kwargs): if 'url' in kwargs: self.url = kwargs['url'] else: raise ValueError("URL needs to be provided.") if 'threshold' in kwargs: self.threshold = kwargs['threshold'] else: self.threshold = False logger.debug( 'No threshold provided. Provide one when running the blindset test.' ) # make sure all variables are here if ('apikey' not in kwargs) and (('username' not in kwargs) or ('password' not in kwargs)): raise ValueError( "One of username & password, or apikey must be present. ") if 'apikey' in kwargs: self.apikey = kwargs['apikey'] self.auth_type = 'apikey' if ('username' in kwargs) and ('password' in kwargs): self.username = kwargs['username'] self.password = kwargs['password'] self.auth_type = 'password' if 'version' in kwargs: self.conversation_version = kwargs['version'] else: self.conversation_version = '2018-07-10' # authenticate self.authenticate_watson()
def main(topic_list, conf_matrix, save_master_data): skill_list = process_list_argument(topic_list, val_type=str) master_skill_id = None # so exception works master_thresh = Credentials.calculate_workspace_thresh("master") try: id_dict = { skill: Credentials.workspace_id[active_adoption][skill] for skill in skill_list } timestr = generate_timestamp() # for use in all filenames # authenticate if "apikey" in instance_creds: logger.debug("Authenticating (apikey)") bs = blindset.blindset( apikey=instance_creds["apikey"], url=instance_creds["url"], version=conversation_version, ) elif "password" in instance_creds: logger.debug("Authenticating (username/password)") bs = blindset.blindset( username=instance_creds["username"], password=instance_creds["password"], url=instance_creds["url"], version=conversation_version, ) # check skills exist check_skills_exist(skill_list) # import blindsets and generate master logger.info("Importing all blindsets and combining into master") blind_dict = dict() for skill in skill_list: bs_path = os.path.join(config.data_dir, f"{skill}_blindset.csv") blind_dict[skill] = bs.import_blindset(bs_path) master_blind_allcols = pd.concat( [v.assign(topic=k) for k, v in blind_dict.items()], axis=0, ignore_index=True, sort=False, ) master_blind = master_blind_allcols[[ "utterance", "topic" ]].rename(columns={"topic": "expected intent"}) # generate master from topic training and push to WA logger.info("Getting training data from WA") train_dict = dict() for skill in skill_list: train_dict[skill] = wa_utils.get_training_data( bs.assistant, id_dict[skill]) logger.info("Creating temporary master skill") master_train = pd.concat( [ v.drop(columns=["intent"]).assign(intent=k) for k, v in train_dict.items() ], axis=0, ignore_index=True, sort=False, ) master_skill_id = wa_utils.create_workspace_from_df( bs.assistant, name="master", train_df=master_train, description="generated by intent_training_tools", ) # run blindset on master logger.info("Running blindset on master..") results_master = bs.run_blind_test(master_blind, master_skill_id, threshold=master_thresh) results_master["routing"] = results_master["intent1"] results_master.loc[results_master["confidence1"] < master_thresh, "routing"] = "anything_else" # create blindsets for topics based on master results newblind_dict = dict() for skill in skill_list: # blindset for each skill is made up of utterances that have landed in that skill for master blind_utterances = results_master.loc[ (results_master["intent1"] == skill) & (results_master["confidence1"] >= master_thresh), "original_text", ].tolist() newblind = master_blind_allcols[master_blind_allcols["utterance"]. isin(blind_utterances)].copy() newblind.loc[newblind["topic"] != skill, "expected intent"] = "anything_else" newblind_dict[skill] = newblind[["utterance", "expected intent" ]].reset_index(drop=True) # run blindsets on topics logger.info("Running blindset on topic skills..") results_dict = dict() for skill in skill_list: results_dict[skill] = bs.run_blind_test( newblind_dict[skill], id_dict[skill], threshold=Credentials.calculate_workspace_thresh(skill), ) # plot confusion matrices if conf_matrix: from conversation_test.confusionmatrix import ConfusionMatrix conf_output_path = lambda s: os.path.join( config.output_folder, f"{s}_multi_confmat_{timestr}.png") # master cfn = ConfusionMatrix(workspace_thresh=master_thresh) cfn.create(results_master, fig_path=conf_output_path("master")) # topics for skill in skill_list: cfn = ConfusionMatrix(workspace_thresh=Credentials. calculate_workspace_thresh(skill)) cfn.create(results_dict[skill], fig_path=conf_output_path(skill)) logger.info("Confusion matrix saved to results folder") # calculate metrics # master met = Metrics(workspace_thresh=master_thresh) metrics_master, _ = met.get_all_metrics(results_master, detailed_results=True) # topics metrics_dict = dict() res_with_conf_dict = dict() for skill in skill_list: met = Metrics( workspace_thresh=Credentials.calculate_workspace_thresh(skill)) metrics_dict[skill], res_with_conf_dict[ skill] = met.get_all_metrics(results_dict[skill], detailed_results=True) # topics - create overall view as if it's a single skill topics_res_with_conf = pd.concat( [v for k, v in res_with_conf_dict.items()], ignore_index=True, sort=False) results_master.loc[results_master["routing"] == "anything_else", 'confusion'] = 'FN' topics_res_with_conf = topics_res_with_conf.append( results_master, ignore_index=True, sort=False, ) metrics_overall = met.calculate_metrics_per_intent( topics_res_with_conf, detailed_results=True) metrics_overall.loc[metrics_overall.index.isin(skill_list), 'threshold'] = master_thresh metrics_overall = metrics_overall.rename( index={s: s + ' - anything else' for s in skill_list}) # export results for skill in skill_list: results_dict[skill].to_csv( os.path.join(config.output_folder, f"{skill}_multi_results_{timestr}.csv"), index=None, ) metrics_dict[skill].to_csv( os.path.join(config.output_folder, f"{skill}_multi_metrics_{timestr}.csv")) results_master.to_csv( os.path.join(config.output_folder, f"master_multi_results_{timestr}.csv"), index=None, ) metrics_master.to_csv( os.path.join(config.output_folder, f"master_multi_metrics_{timestr}.csv")) metrics_overall.to_csv( os.path.join(config.output_folder, f"overall_multi_metrics_{timestr}.csv")) logger.info("Results and metrics saved to output folder") if save_master_data: # export master blindset with both intent and topic labels to CSV master_blind_allcols.to_csv( os.path.join(config.data_dir, f"master_blindset_{timestr}.csv"), index=None, ) # export master training to CSV master_train.to_csv( os.path.join(config.data_dir, f"master_training_{timestr}.csv"), header=None, index=None, ) logger.info( "Master blindset and training have also been saved to the data folder" ) # delete master skill logger.info("Deleting temporary master skill") wa_utils.delete_workspace(bs.assistant, master_skill_id) except Exception as e: if master_skill_id is not None: # make sure master deleted anyway logger.info("Deleting temporary master skill before exit") wa_utils.delete_workspace(bs.assistant, master_skill_id) raise e
def run_kfold(topic, no_folds, results_type, conf_matrix): """ Runs kfold test using credentials in ../Credentials.py """ # get credentials, import + export folders import Credentials active_adoption = Credentials.active_adoption instance_creds = Credentials.ctx[active_adoption] workspace_id = Credentials.workspace_id[active_adoption][topic] workspace_thresh = Credentials.calculate_workspace_thresh(topic) conversation_version = Credentials.conversation_version # import + export folders import config import time data_folder = config.data_dir export_folder = config.output_folder timestr = time.strftime("%Y%m%d-%H%M") output_loc_results = os.path.join( export_folder, "{}_kfold_results_raw_{}.csv".format(topic, timestr)) output_loc_metrics = os.path.join( export_folder, "{}_kfold_results_metrics_{}.csv".format(topic, timestr)) output_loc_confmat = os.path.join( export_folder, "{}_kfold_confmat_{}.png".format(topic, timestr)) # authenticate if 'apikey' in instance_creds: logger.debug("Authenticating (apikey)") kf = kfoldtest(n_folds=no_folds, apikey=instance_creds['apikey'], url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version) elif 'password' in instance_creds: logger.debug("Authenticating (username/password)") kf = kfoldtest(n_folds=no_folds, username=instance_creds['username'], password=instance_creds['password'], url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version) # get train df from watson + check there are sufficient workspaces to run the test train_df = kf.intent_df_from_watson(workspace_id) kf.check_sufficient_workspaces() # create folds in WA if above is true folds = kf.create_folds(method='kfold') kf.create_kfold_WA(folds) available_flag = False while available_flag == False: logger.info("Checking workspaces..") available_flag = kf.check_workspaces_status() time.sleep(20) # run kfold test try: results = kf.run_kfold_test(folds) if (results_type == 'raw') or (results_type == 'all'): results.to_csv(output_loc_results) classification_report = kf.create_classification_report(results) if (results_type == 'metrics') or (results_type == 'all'): metrics = Metrics(workspace_thresh) metric_df = metrics.get_all_metrics_CV( results, fold_col='fold', detailed_results=False) metric_df.to_csv(output_loc_metrics) # TODO: confusion matrix if conf_matrix: from confusionmatrix import ConfusionMatrix cfn = ConfusionMatrix(workspace_thresh=workspace_thresh) cfn.create(results, fig_path=output_loc_confmat) logger.info("Confusion matrix saved to {}".format( output_loc_confmat)) finally: # regardless of what happens above, delete the temporary workspaces before exiting kf.delete_kfold_workspaces()
def run_blindset(topic, results_type, conf_matrix, blindset_name): """ Runs blindset test using credentials in ../Credentials.py """ # get credentials, import + export folders import Credentials active_adoption = Credentials.active_adoption instance_creds = Credentials.ctx[active_adoption] print(instance_creds) print('print works') workspace_id = Credentials.workspace_id[active_adoption][topic] workspace_thresh = Credentials.calculate_workspace_thresh(topic) conversation_version = Credentials.conversation_version # import + export folders import config import time data_folder = config.data_dir export_folder = config.output_folder timestr = time.strftime("%Y%m%d-%H%M") blindset_name = blindset_name or topic + "_blindset.csv" output_loc_results = os.path.join( export_folder, "{}_results_raw_{}.csv".format(topic, timestr)) output_loc_metrics = os.path.join( export_folder, "{}_results_metrics_{}.csv".format(topic, timestr)) output_loc_confmat = os.path.join( export_folder, "{}_confmat_{}.png".format(topic, timestr)) # authenticate if 'apikey' in instance_creds: logger.debug("Authenticating (apikey)") bs = blindset(apikey=instance_creds['apikey'], url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version) elif 'password' in instance_creds: logger.debug("Authenticating (username/password)") bs = blindset(username=instance_creds['username'], password=instance_creds['password'], url=instance_creds['url'], threshold=workspace_thresh, version=conversation_version) # run test blindset_df = bs.import_blindset(os.path.join(data_folder, blindset_name)) # TODO: check blindset df results = bs.run_blind_test(blindset_df, workspace_id) # exports + metrics if (results_type == 'raw') or (results_type == 'all'): cols_export = [ col for col in results.columns.values if col != 'intent_correct' ] results[cols_export].to_csv(output_loc_results, encoding='utf-8') logger.info("Raw results exported to {}".format(output_loc_results)) if (results_type == 'metrics') or (results_type == 'all'): met = Metrics(workspace_thresh) metric_df, _ = met.get_all_metrics(results, detailed_results=True) metric_df.to_csv(output_loc_metrics, encoding='utf-8') logger.info( "Metrics per intent exported to {}".format(output_loc_metrics)) # confusion matrix if conf_matrix: from confusionmatrix import ConfusionMatrix cfn = ConfusionMatrix(workspace_thresh=workspace_thresh) cfn.create(results, fig_path=output_loc_confmat) #bs.plot_confusion_matrix(results, output_loc_confmat) logger.info("Confusion matrix saved to {}".format(output_loc_confmat)) # print high-level metrics overall_metrics = bs.calculate_overall_metrics(results, av_method="weighted") logger.info("Overall metrics for the workspace (weighted):") logger.info(overall_metrics)