def generate_folds(n, td): """Generates n cross validation folds for training data td.""" from sklearn.model_selection import StratifiedKFold skf = StratifiedKFold(n_splits=n, shuffle=True) x = td.intent_examples y = [example.get("intent") for example in x] for i_fold, (train_index, test_index) in enumerate(skf.split(x, y)): logger.debug("Fold: {}".format(i_fold)) train = [x[i] for i in train_index] test = [x[i] for i in test_index] yield (TrainingData(training_examples=train, entity_synonyms=td.entity_synonyms, regex_features=td.regex_features), TrainingData(training_examples=test, entity_synonyms=td.entity_synonyms, regex_features=td.regex_features))
def run_cv_evaluation(data, n_folds, nlu_config): from sklearn import metrics from sklearn.model_selection import StratifiedKFold from collections import defaultdict # type: (List[rasa_nlu.training_data.Message], int, RasaNLUConfig) -> Dict[Text, List[float]] """Stratified cross validation on data :param data: list of rasa_nlu.training_data.Message objects :param n_folds: integer, number of cv folds :param nlu_config: nlu config file :return: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ trainer = Trainer(nlu_config) results = defaultdict(list) y_true = [e.get("intent") for e in data] skf = StratifiedKFold(n_splits=n_folds, random_state=11, shuffle=True) counter = 1 logger.info("Evaluation started") for train_index, test_index in skf.split(data, y_true): train = [data[i] for i in train_index] test = [data[i] for i in test_index] logger.debug("Fold: {}".format(counter)) logger.debug("Training ...") trainer.train(TrainingData(training_examples=train)) model_directory = trainer.persist( "projects/") # Returns the directory the model is stored in logger.debug("Evaluation ...") interpreter = Interpreter.load(model_directory, nlu_config) test_y = [e.get("intent") for e in test] preds = [] for e in test: res = interpreter.parse(e.text) if res.get('intent'): preds.append(res['intent'].get('name')) else: preds.append(None) # compute fold metrics results["Accuracy"].append(metrics.accuracy_score(test_y, preds)) results["F1-score"].append( metrics.f1_score(test_y, preds, average='weighted')) results["Precision"] = metrics.precision_score(test_y, preds, average='weighted') # increase fold counter counter += 1 return dict(results)
def drop_intents_below_freq(td: TrainingData, cutoff: int = 5): """Remove intent groups with less than cutoff instances.""" logger.debug( "Raw data intent examples: {}".format(len(td.intent_examples))) keep_examples = [ex for ex in td.intent_examples if td.examples_per_intent[ex.get("intent")] >= cutoff] return TrainingData(keep_examples, td.entity_synonyms, td.regex_features)
def run_cv_evaluation(data, n_folds, nlu_config): # type: (List[rasa_nlu.training_data.Message], int, RasaNLUConfig) -> Dict[Text, List[float]] """Stratified cross validation on data :param data: list of rasa_nlu.training_data.Message objects :param n_folds: integer, number of cv folds :param nlu_config: nlu config file :return: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from sklearn.model_selection import StratifiedKFold from collections import defaultdict trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) y_true = [e.get("intent") for e in data] skf = StratifiedKFold(n_splits=n_folds, random_state=11, shuffle=True) counter = 1 logger.info("Evaluation started") for train_index, test_index in skf.split(data, y_true): train = [data[i] for i in train_index] test = [data[i] for i in test_index] logger.debug("Fold: {}".format(counter)) logger.debug("Training ...") trainer.train(TrainingData(training_examples=train)) model_directory = trainer.persist( "projects/") # Returns the directory the model is stored in logger.debug("Evaluation ...") interpreter = Interpreter.load(model_directory, nlu_config) # calculate train accuracy compute_metrics(interpreter, train, train_results) # calculate test accuracy compute_metrics(interpreter, test, test_results) # increase fold counter counter += 1 Results = namedtuple('Results', 'train test') results = Results(dict(train_results), dict(test_results)) return results
def run_cv_evaluation(td, n_folds, nlu_config): # type: (TrainingData, int, RasaNLUConfig) -> CVEvaluationResult """Stratified cross validation on data :param td: Training Data :param n_folds: integer, number of cv folds :param nlu_config: nlu config file :return: dictionary with key, list structure, where each entry in list corresponds to the relevant result for one fold """ from sklearn import metrics from collections import defaultdict import tempfile trainer = Trainer(nlu_config) train_results = defaultdict(list) test_results = defaultdict(list) tmp_dir = tempfile.mkdtemp() for train, test in generate_folds(n_folds, td): trainer.train(TrainingData(training_examples=train, entity_synonyms=td.entity_synonyms, regex_features=td.regex_features)) model_dir = trainer.persist(tmp_dir) interpreter = Interpreter.load(model_dir, nlu_config) # calculate train accuracy compute_metrics(interpreter, train, train_results) # calculate test accuracy compute_metrics(interpreter, test, test_results) utils.remove_model(model_dir) os.rmdir(os.path.join(tmp_dir, "default")) os.rmdir(tmp_dir) return CVEvaluationResult(dict(train_results), dict(test_results))