def setUp(self): self.given = self self.then = self self.when = self self._and = self self.logger = initialize_logger(logging.INFO, name=TestRetrieveAndRankProxy.__name__) self.new_cluster_id = None self.temp_dir = mkdtemp()
def setUp(self): self.given = self self.then = self self.when = self self._and = self self._with = self self.logger = initialize_logger(logging.INFO, name=TestGenerateRnrFeatures.__name__) self.temp_dir = mkdtemp() self.bluemix_cluster = None
def setUp(self): self.given = self self.then = self self.when = self self._and = self self.logger = initialize_logger(logging.INFO, name=TestRankerProxy.__name__) self.ranker_id = None self.temp_dir = mkdtemp()
def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'BluemixServiceProxy')): """ :param ConfigParser config: for access to credentials :param logging.Logger logger: for logging """ self.logger = logger self.config = config self.bluemix_url, self.bluemix_user, self.bluemix_password = get_rnr_credentials(config) self.bluemix_connection = RetrieveAndRankV1(url=self.bluemix_url, username=self.bluemix_user, password=self.bluemix_password)
def setUp(self): self.given = self self.then = self self.when = self self._and = self self.logger = initialize_logger(logging.INFO, name=TestDiscoveryProxy.__name__) self.collection_id = None self.config_id = None self.temp_dir = mkdtemp()
def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'DiscoveryProxy')): """ Initialize the connection to Bluemix :param CofigParser config: An initialized config with user, password and environment id """ self.config = config self.logger = logger self.discovery = initialize_discovery_service(config) self.environment_id = config.get( 'Discovery', 'environment_id', fallback=search_for_byod_environment_id(self.discovery))
import argparse import csv import logging from os import makedirs, path from rnr_debug_helpers.utils.io_helpers import initialize_query_stream, smart_file_open, initialize_logger TRAIN_RELEVANCE_FILENAME = "train.relevance_file.csv" VALIDATION_RELEVANCE_FILENAME = 'validation.relevance_file.csv' LOGGER = initialize_logger(logging.INFO, path.basename(__file__)) def k_fold_cross_validation(X, K, randomise=False): """ Taken from http://code.activestate.com/recipes/521906-k-fold-cross-validation-partition/ Generates K (training, validation) pairs from the items in X. Each pair is a partition of X, where validation is an iterable of length len(X)/K. So each training iterable is of length (K-1)*len(X)/K. If randomise is true, a copy of X is shuffled before partitioning, otherwise its order is preserved in training and validation. :param iterable X: set of data points to split into folds :param int K: number of folds to create splits for :param bool randomise: whether or not incoming list should be randomized :return: a train and validation split for each fold. Implemented as generator, so each call yields a new split for a new fold up to k :rtype: tuple(list, list) """ if randomise:
import argparse import csv import json import logging import sys from collections import defaultdict, OrderedDict from copy import deepcopy from rnr_debug_helpers.utils.answer import Answer from rnr_debug_helpers.utils.io_helpers import LabelledQuery, initialize_logger, smart_file_open, \ PredictionReader, initialize_query_stream from rnr_debug_helpers.utils.stats import compute_average_precision_for_query, compute_ndcg_for_query, \ compute_top_1_accuracy, compute_recall_for_query MAX_PRECISION_THRESHOLDS = None LOGGER = initialize_logger(logging.INFO, "Computing Accuracy") _DEFAULT_SCORE = -sys.maxsize def _get_next_n_scores_from_prediction_reader(predictions, num_to_read): """ helper script to read a specific number of predictions from the predictions file :param RaasPredictionReader predictions: reader for raas prediction file :param num_to_read: number of answers to read :return: list of rank scores and list of confidence scores (if any confidence scores were provided) :rtype: tuple(list, list) """ rank_scores_for_query = [] if predictions.is_configured_with_confidence_scores(): conf_scores_for_query = []
import argparse import csv import logging import sys from collections import defaultdict from os import path from rnr_debug_helpers.compute_ranking_stats import generate_correct_answer_lookup, assign_labels_and_scores from rnr_debug_helpers.utils.io_helpers import initialize_logger, smart_file_open, \ PredictionReader, initialize_query_stream from rnr_debug_helpers.utils.stats import compute_recall_for_query LOGGER = initialize_logger(logging.INFO, path.basename(__file__)) def compute_recall_stats(k_settings_for_recall, labelled_query_stream, prediction_reader): correct_answers_by_qid = generate_correct_answer_lookup( labelled_query_stream) LOGGER.info("scoring predictions from: %s (against labels from %s)" % (prediction_reader, labelled_query_stream)) stats = defaultdict(int) stats['num_queries'] = len(correct_answers_by_qid) try: while True: labelled_answer_set = assign_labels_and_scores( prediction_reader.get_all_predictions_till_next_query(), correct_answers_by_qid) stats['num_queries_predicted'] += 1 for k_for_recall in k_settings_for_recall: