def _load_dataset_without_responses(corpus_name, token_to_index): tokenized_lines = get_tokenized_test_lines(corpus_name, set(token_to_index.keys())) context_tokens_ids = transform_contexts_to_token_ids( lines_to_context(tokenized_lines), token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE, max_contexts_num=len(tokenized_lines)) return Dataset(x=context_tokens_ids, y=None, condition_ids=None)
def _save_test_results(test_dataset, predictions_filename, nn_model, prediction_modes, **kwargs): test_dataset_ids = transform_contexts_to_token_ids( list(lines_to_context(test_dataset)), nn_model.token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE) calculate_and_log_val_metrics( nn_model, load_context_sensitive_val(nn_model.token_to_index, nn_model.condition_to_index), load_context_free_val(nn_model.token_to_index)) log_predictions(predictions_filename, test_dataset_ids, nn_model, prediction_modes, **kwargs)
def _save_test_results(test_dataset, predictions_filename, nn_model, prediction_mode, **kwargs): test_dataset_ids = transform_contexts_to_token_ids( list(lines_to_context(test_dataset)), nn_model.token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE) calculate_and_log_val_metrics(nn_model, load_context_sensitive_val(nn_model.token_to_index, nn_model.condition_to_index), load_context_free_val(nn_model.token_to_index)) log_predictions( predictions_filename, test_dataset_ids, nn_model, mode=prediction_mode, candidates_num=LOG_CANDIDATES_NUM, **kwargs)
def _get_context_to_weighted_responses(nn_model, testset, all_utterances): token_to_index = nn_model.token_to_index all_utterances_ids = transform_lines_to_token_ids( map(get_tokens_sequence, all_utterances), token_to_index, OUTPUT_SEQUENCE_LENGTH, add_start_end=True) context_to_weighted_responses = {} for context in testset: context_tokenized = get_tokens_sequence(context) repeated_context_ids = transform_contexts_to_token_ids( [[context_tokenized]] * len(all_utterances), token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE) scores = get_sequence_score(nn_model, repeated_context_ids, all_utterances_ids) context_to_weighted_responses[context] = dict(zip(all_utterances, scores)) return context_to_weighted_responses
def get_response(dialog_context, emotion): """ :param dialog_context: list of dialog utterances :param emotion: emotion to condition response :return: dialog response conditioned on input emotion """ tokenized_dialog_context = list(map(get_tokens_sequence, dialog_context)) tokenized_dialog_contexts = [tokenized_dialog_context] context_tokens_ids = transform_contexts_to_token_ids( tokenized_dialog_contexts, _cakechat_model.token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE) condition_ids = transform_conditions_to_ids( [emotion], _cakechat_model.condition_to_index, n_dialogs=1) if PREDICTION_MODE == PREDICTION_MODES.sampling: # Different strategy here for better performance. return _get_non_offensive_response_using_fast_sampling( context_tokens_ids, condition_ids) else: return _get_non_offensive_response(context_tokens_ids, condition_ids)
def _get_context_to_weighted_responses(nn_model, testset, all_utterances): token_to_index = nn_model.token_to_index all_utterances_ids = transform_lines_to_token_ids(list( map(get_tokens_sequence, all_utterances)), token_to_index, OUTPUT_SEQUENCE_LENGTH, add_start_end=True) context_to_weighted_responses = {} for context in testset: context_tokenized = get_tokens_sequence(context) repeated_context_ids = transform_contexts_to_token_ids( [[context_tokenized]] * len(all_utterances), token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE) scores = get_sequence_score(nn_model, repeated_context_ids, all_utterances_ids) context_to_weighted_responses[context] = dict( zip(all_utterances, scores)) return context_to_weighted_responses
def transform_lines_to_contexts_token_ids(tokenized_lines, nn_model): return transform_contexts_to_token_ids( list(lines_to_context(tokenized_lines)), nn_model.token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE)