def parse_args(): argparser = argparse.ArgumentParser() argparser.add_argument( '-r', '--reverse', action='store_true', help='Pass this flag if you want to train reverse model. ' 'The model will be stored at {}'.format(get_model_full_path(is_reverse_model=True))) return argparser.parse_args()
def _log_sample_answers(x_test, nn_model, mode, is_reverse_model): _logger.info('Model: {}'.format(get_model_full_path(is_reverse_model))) _logger.info('Start predicting responses of length {out_len} for {n_samples} samples with mode {mode}'.format( out_len=MAX_PREDICTIONS_LENGTH, n_samples=x_test.shape[0], mode=mode)) questions = transform_context_token_ids_to_sentences(x_test, nn_model.index_to_token) responses = get_nn_responses(x_test, nn_model, mode, output_candidates_num=LOG_CANDIDATES_NUM) _logger.info('Finished predicting! Logging...') for i, (question_ids, question) in enumerate(zip(x_test, questions)): laconic_logger.info('') # for better readability for j, response in enumerate(responses[i]): laconic_logger.info('%-35s\t --#=%02d--> \t%s' % (question, j + 1, response))
def load_model(): index_to_token_path = get_index_to_token_path(BASE_CORPUS_NAME) index_to_condition_path = get_index_to_condition_path(BASE_CORPUS_NAME) model_path = get_model_full_path() index_to_token = load_index_to_item(index_to_token_path) index_to_condition = load_index_to_item(index_to_condition_path) nn_model, model_exists = get_nn_model(index_to_token, index_to_condition, nn_model_path=model_path) if not model_exists: raise FileNotFoundException('Couldn\'t find model:\n"{}". \nExiting...'.format(model_path)) return nn_model
def _update_saved_nn_model(nn_model, cur_perplexities, best_perplexities, is_reverse_model=False): model_path = get_model_full_path(is_reverse_model) if all((cur < best) for cur, best in zip(cur_perplexities, best_perplexities)): old_suffix = '_pp_free{0:.2f}_sensitive{1:.2f}'.format(*best_perplexities) new_suffix = '_pp_free{0:.2f}_sensitive{1:.2f}'.format(*cur_perplexities) best_perplexities = cur_perplexities _save_model(nn_model, model_path + new_suffix) if new_suffix != old_suffix: _delete_model(model_path + old_suffix) else: _save_model(nn_model, model_path) return best_perplexities
def load_model(model_path=None, tokens_index_path=None, conditions_index_path=None): if model_path is None: model_path = get_model_full_path() if tokens_index_path is None: tokens_index_path = get_index_to_token_path(BASE_CORPUS_NAME) if conditions_index_path is None: conditions_index_path = get_index_to_condition_path(BASE_CORPUS_NAME) index_to_token = load_index_to_item(tokens_index_path) index_to_condition = load_index_to_item(conditions_index_path) nn_model, model_exists = get_nn_model(index_to_token, index_to_condition, nn_model_path=model_path) if not model_exists: raise ValueError('Couldn\'t find model: "{}".'.format(model_path)) return nn_model
def load_model(): index_to_token_path = get_index_to_token_path(BASE_CORPUS_NAME) index_to_condition_path = get_index_to_condition_path(BASE_CORPUS_NAME) model_path = get_model_full_path() index_to_token = load_index_to_item(index_to_token_path) index_to_condition = load_index_to_item(index_to_condition_path) nn_model, model_exists = get_nn_model(index_to_token, index_to_condition, nn_model_path=model_path) if not model_exists: raise FileNotFoundException( 'Couldn\'t find model:\n"{}". \nExiting...'.format(model_path)) return nn_model
def get_nn_model(index_to_token, index_to_condition, w2v_matrix=None, resolver_factory=None, nn_model_path=None, is_reverse_model=False): _logger.info('Initializing NN model with the following params:') _logger.info('NN input dimension: {} (token vector size)'.format( WORD_EMBEDDING_DIMENSION + CONDITION_EMBEDDING_DIMENSION)) _logger.info('NN hidden dimension: {}'.format(HIDDEN_LAYER_DIMENSION)) _logger.info('NN output dimension: {} (dict size)'.format( len(index_to_token))) if w2v_matrix is not None: w2v_matrix = w2v_matrix.astype(theano.config.floatX) model = CakeChatModel(index_to_token, index_to_condition, init_embedding=w2v_matrix) if not nn_model_path: nn_model_path = get_model_full_path(is_reverse_model) resolver = resolver_factory( nn_model_path) if resolver_factory else DummyFileResolver( nn_model_path) model_exists = resolver.resolve() if model_exists: _logger.info('Loading previously calculated weights from {}...'.format( nn_model_path)) model.load_weights(nn_model_path) else: _logger.info( "Can't find previously calculated model, so will use a fresh one") _logger.info('Model is built\n') model.print_layer_shapes() model.print_matrices_weights() _logger.info('Model path is {}'.format(nn_model_path)) return model, model_exists
def train(is_reverse_model=False): processed_train_corpus_path = get_processed_corpus_path(TRAIN_CORPUS_NAME) processed_val_corpus_path = get_processed_corpus_path(CONTEXT_SENSITIVE_VAL_CORPUS_NAME) index_to_token_path = get_index_to_token_path(BASE_CORPUS_NAME) index_to_condition_path = get_index_to_condition_path(BASE_CORPUS_NAME) model_path = get_model_full_path(is_reverse_model) # check the existence of all necessary files before compiling the model _look_for_saved_files(files_paths=[processed_train_corpus_path, processed_val_corpus_path, index_to_token_path]) _look_for_saved_model(model_path) index_to_token = load_index_to_item(index_to_token_path) index_to_condition = load_index_to_item(index_to_condition_path) w2v_matrix = _get_w2v_embedding_matrix_by_corpus_path(processed_train_corpus_path, index_to_token) # get nn_model and train it nn_model, _ = get_nn_model(index_to_token, index_to_condition, w2v_matrix) train_model(nn_model, is_reverse_model=is_reverse_model)
def train_model(nn_model, is_reverse_model=False): """ Main function fo training. Refactoring anticipated. """ validation_prediction_mode = PREDICTION_MODES.sampling if is_reverse_model else PREDICTION_MODE_FOR_TESTS train = load_conditioned_train_set(nn_model.token_to_index, nn_model.condition_to_index) context_free_val = load_context_free_val(nn_model.token_to_index) context_sensitive_val = load_context_sensitive_val( nn_model.token_to_index, nn_model.condition_to_index) if is_reverse_model: service_tokens = ServiceTokensIDs(nn_model.token_to_index) train = reverse_nn_input(train, service_tokens) context_free_val = reverse_nn_input(context_free_val, service_tokens) context_sensitive_val = reverse_nn_input(context_sensitive_val, service_tokens) # Train subset of same size as a context-free val for metrics calculation train_subset = generate_subset(train, VAL_SUBSET_SIZE) # Context-sensitive val subset of same size as a context-free val for metrics calculation context_sensitive_val_subset = generate_subset(context_sensitive_val, VAL_SUBSET_SIZE) _logger.info('Finished preprocessing! Start training') batch_id = 0 avg_loss = 0 total_training_time = 0 best_val_perplexities = (float('inf'), float('inf')) batches_num = (train.x.shape[0] - 1) / BATCH_SIZE + 1 start_time = time.time() cur_val_metrics = None try: for epoches_counter in xrange(1, EPOCHES_NUM + 1): _logger.info( 'Starting epoch #%d; time = %0.2f s(training of it = %0.2f s)' % (epoches_counter, time.time() - start_time, total_training_time)) for train_batch in get_training_batch( [train.x, train.y, train.condition_ids], BATCH_SIZE, random_permute=SHUFFLE_TRAINING_BATCHES): x_train_batch, y_train_batch, condition_ids_train_batch = train_batch batch_id += 1 prev_time = time.time() loss = nn_model.train(x_train_batch, y_train_batch, condition_ids_train_batch) cur_time = time.time() total_training_time += cur_time - prev_time total_time = cur_time - start_time avg_loss = LOG_LOSS_DECAY * avg_loss + ( 1 - LOG_LOSS_DECAY) * loss if batch_id > 1 else loss progress = 100 * float(batch_id) / batches_num avr_time_per_sample = total_time / batch_id expected_time_per_epoch = avr_time_per_sample * batches_num # use print here for better readability _logger.info('batch %s / %s (%d%%) \t' 'loss: %.2f \t ' 'time: epoch %.1f h | ' 'total %0.1f h | ' 'train %0.1f h (%.1f%%)' % (batch_id, batches_num, progress, avg_loss, expected_time_per_epoch / 3600, total_time / 3600, total_training_time / 3600, 100 * total_training_time / total_time)) if batch_id % SCREEN_LOG_FREQUENCY_PER_BATCHES == 0: _log_sample_answers( context_free_val.x[:SCREEN_LOG_NUM_TEST_LINES], nn_model, validation_prediction_mode, is_reverse_model) if batch_id % LOG_FREQUENCY_PER_BATCHES == 0: _calc_and_save_train_metrics(nn_model, train_subset, avg_loss) val_metrics = _calc_and_save_val_metrics( nn_model, context_sensitive_val_subset, context_free_val, prediction_mode=validation_prediction_mode) _save_val_results( nn_model, context_free_val.x, context_sensitive_val_subset.x, val_metrics, train_info=(start_time, batch_id, batches_num), prediction_mode=validation_prediction_mode) cur_val_metrics = val_metrics best_val_perplexities = \ _update_saved_nn_model(nn_model, (val_metrics['context_free_perplexity'], val_metrics['context_sensitive_perplexity']), best_val_perplexities, is_reverse_model=is_reverse_model) except (KeyboardInterrupt, SystemExit): _logger.info('Training cycle is stopped manually') _save_model(nn_model, get_model_full_path(is_reverse_model) + '_final') _save_val_results(nn_model, context_free_val.x, context_sensitive_val_subset.x, cur_val_metrics, train_info=(start_time, batch_id, batches_num), suffix='_final', prediction_mode=validation_prediction_mode)
def predict(model_path=None, tokens_index_path=None, conditions_index_path=None, default_predictions_path=None, reverse_model_weights=None, temperatures=None, prediction_mode=PREDICTION_MODE_FOR_TESTS): if not model_path: model_path = get_model_full_path() if not tokens_index_path: tokens_index_path = get_index_to_token_path(BASE_CORPUS_NAME) if not conditions_index_path: conditions_index_path = get_index_to_condition_path(BASE_CORPUS_NAME) if not default_predictions_path: default_predictions_path = os.path.join( DATA_DIR, 'results', 'predictions_' + get_model_full_params_str()) # Construct list of parameters values for all possible combinations of passed parameters prediction_params = [dict()] if reverse_model_weights: prediction_params = [ dict(params, mmi_reverse_model_score_weight=w) for params in prediction_params for w in reverse_model_weights ] if temperatures: prediction_params = [ dict(params, temperature=t) for params in prediction_params for t in temperatures ] # Get path for each combination of parameters predictions_paths = [] # Add suffix to the filename only for parameters that have a specific value passed as an argument # If no parameters were specified, no suffix is added if len(prediction_params) > 1: for cur_params in prediction_params: cur_path = '{base_path}_{params_str}.tsv'.format( base_path=default_predictions_path, params_str='_'.join( ['{}_{}'.format(k, v) for k, v in cur_params.items()])) predictions_paths.append(cur_path) else: predictions_paths = [default_predictions_path + '.tsv'] if not is_non_empty_file(model_path): _logger.warn( 'Couldn\'t find model:\n"{}". \nExiting...'.format(model_path)) return if not is_non_empty_file(tokens_index_path): _logger.warn( 'Couldn\'t find tokens_index file:\n"{}". \nExiting...'.format( tokens_index_path)) return _logger.info('Model for prediction:\n{}'.format(model_path)) _logger.info('Tokens index:\n{}'.format(tokens_index_path)) _logger.info('File with questions:\n{}'.format(QUESTIONS_CORPUS_NAME)) _logger.info('Files to dump responses:\n{}'.format( '\n'.join(predictions_paths))) _logger.info('Prediction parameters\n{}'.format('\n'.join( [str(x) for x in prediction_params]))) index_to_token = load_index_to_item(tokens_index_path) index_to_condition = load_index_to_item(conditions_index_path) processed_test_set = get_tokenized_test_lines(QUESTIONS_CORPUS_NAME, set(index_to_token.values())) processed_test_set = list(processed_test_set) nn_model, _ = get_nn_model(index_to_token, index_to_condition, nn_model_path=model_path) for cur_params, cur_path in zip(prediction_params, predictions_paths): _logger.info( 'Predicting with the following params: {}'.format(cur_params)) _save_test_results(processed_test_set, cur_path, nn_model, prediction_mode, **cur_params)
def predict(model_path=None, tokens_index_path=None, conditions_index_path=None, default_predictions_path=None, reverse_model_weights=None, temperatures=None, prediction_mode=PREDICTION_MODE_FOR_TESTS): if not model_path: model_path = get_model_full_path() if not tokens_index_path: tokens_index_path = get_index_to_token_path(BASE_CORPUS_NAME) if not conditions_index_path: conditions_index_path = get_index_to_condition_path(BASE_CORPUS_NAME) if not default_predictions_path: default_predictions_path = os.path.join(DATA_DIR, 'results', 'predictions_' + get_model_full_params_str()) # Construct list of parameters values for all possible combinations of passed parameters prediction_params = [dict()] if reverse_model_weights: prediction_params = [ dict(params, mmi_reverse_model_score_weight=w) for params in prediction_params for w in reverse_model_weights ] if temperatures: prediction_params = [dict(params, temperature=t) for params in prediction_params for t in temperatures] # Get path for each combination of parameters predictions_paths = [] # Add suffix to the filename only for parameters that have a specific value passed as an argument # If no parameters were specified, no suffix is added if len(prediction_params) > 1: for cur_params in prediction_params: cur_path = '{base_path}_{params_str}.tsv'.format( base_path=default_predictions_path, params_str='_'.join(['{}_{}'.format(k, v) for k, v in cur_params.items()])) predictions_paths.append(cur_path) else: predictions_paths = [default_predictions_path + '.tsv'] if not is_non_empty_file(model_path): _logger.warn('Couldn\'t find model:\n"{}". \nExiting...'.format(model_path)) return if not is_non_empty_file(tokens_index_path): _logger.warn('Couldn\'t find tokens_index file:\n"{}". \nExiting...'.format(tokens_index_path)) return _logger.info('Model for prediction:\n{}'.format(model_path)) _logger.info('Tokens index:\n{}'.format(tokens_index_path)) _logger.info('File with questions:\n{}'.format(QUESTIONS_CORPUS_NAME)) _logger.info('Files to dump responses:\n{}'.format('\n'.join(predictions_paths))) _logger.info('Prediction parameters\n{}'.format('\n'.join([str(x) for x in prediction_params]))) index_to_token = load_index_to_item(tokens_index_path) index_to_condition = load_index_to_item(conditions_index_path) processed_test_set = get_tokenized_test_lines(QUESTIONS_CORPUS_NAME, set(index_to_token.values())) processed_test_set = list(processed_test_set) nn_model, _ = get_nn_model(index_to_token, index_to_condition, nn_model_path=model_path) for cur_params, cur_path in zip(prediction_params, predictions_paths): _logger.info('Predicting with the following params: {}'.format(cur_params)) _save_test_results(processed_test_set, cur_path, nn_model, prediction_mode, **cur_params)
def train_model(nn_model, is_reverse_model=False): """ Main function fo training. Refactoring anticipated. """ validation_prediction_mode = PREDICTION_MODES.sampling if is_reverse_model else PREDICTION_MODE_FOR_TESTS train = load_conditioned_train_set(nn_model.token_to_index, nn_model.condition_to_index) context_free_val = load_context_free_val(nn_model.token_to_index) context_sensitive_val = load_context_sensitive_val(nn_model.token_to_index, nn_model.condition_to_index) if is_reverse_model: service_tokens = ServiceTokensIDs(nn_model.token_to_index) train = reverse_nn_input(train, service_tokens) context_free_val = reverse_nn_input(context_free_val, service_tokens) context_sensitive_val = reverse_nn_input(context_sensitive_val, service_tokens) # Train subset of same size as a context-free val for metrics calculation train_subset = generate_subset(train, VAL_SUBSET_SIZE) # Context-sensitive val subset of same size as a context-free val for metrics calculation context_sensitive_val_subset = generate_subset(context_sensitive_val, VAL_SUBSET_SIZE) _logger.info('Finished preprocessing! Start training') batch_id = 0 avg_loss = 0 total_training_time = 0 best_val_perplexities = (float('inf'), float('inf')) batches_num = (train.x.shape[0] - 1) / BATCH_SIZE + 1 start_time = time.time() cur_val_metrics = None try: for epoches_counter in xrange(1, EPOCHES_NUM + 1): _logger.info('Starting epoch #%d; time = %0.2f s(training of it = %0.2f s)' % (epoches_counter, time.time() - start_time, total_training_time)) for train_batch in get_training_batch( [train.x, train.y, train.condition_ids], BATCH_SIZE, random_permute=SHUFFLE_TRAINING_BATCHES): x_train_batch, y_train_batch, condition_ids_train_batch = train_batch batch_id += 1 prev_time = time.time() loss = nn_model.train(x_train_batch, y_train_batch, condition_ids_train_batch) cur_time = time.time() total_training_time += cur_time - prev_time total_time = cur_time - start_time avg_loss = LOG_LOSS_DECAY * avg_loss + (1 - LOG_LOSS_DECAY) * loss if batch_id > 1 else loss progress = 100 * float(batch_id) / batches_num avr_time_per_sample = total_time / batch_id expected_time_per_epoch = avr_time_per_sample * batches_num # use print here for better readability _logger.info('batch %s / %s (%d%%) \t' 'loss: %.2f \t ' 'time: epoch %.1f h | ' 'total %0.1f h | ' 'train %0.1f h (%.1f%%)' % (batch_id, batches_num, progress, avg_loss, expected_time_per_epoch / 3600, total_time / 3600, total_training_time / 3600, 100 * total_training_time / total_time)) if batch_id % SCREEN_LOG_FREQUENCY_PER_BATCHES == 0: _log_sample_answers(context_free_val.x[:SCREEN_LOG_NUM_TEST_LINES], nn_model, validation_prediction_mode, is_reverse_model) if batch_id % LOG_FREQUENCY_PER_BATCHES == 0: _calc_and_save_train_metrics(nn_model, train_subset, avg_loss) val_metrics = _calc_and_save_val_metrics( nn_model, context_sensitive_val_subset, context_free_val, prediction_mode=validation_prediction_mode) _save_val_results( nn_model, context_free_val.x, context_sensitive_val_subset.x, val_metrics, train_info=(start_time, batch_id, batches_num), prediction_mode=validation_prediction_mode) cur_val_metrics = val_metrics best_val_perplexities = \ _update_saved_nn_model(nn_model, (val_metrics['context_free_perplexity'], val_metrics['context_sensitive_perplexity']), best_val_perplexities, is_reverse_model=is_reverse_model) except (KeyboardInterrupt, SystemExit): _logger.info('Training cycle is stopped manually') _save_model(nn_model, get_model_full_path(is_reverse_model) + '_final') _save_val_results( nn_model, context_free_val.x, context_sensitive_val_subset.x, cur_val_metrics, train_info=(start_time, batch_id, batches_num), suffix='_final', prediction_mode=validation_prediction_mode)