def setUp(self):
        self.given = self
        self.then = self
        self.when = self
        self._and = self

        self.logger = initialize_logger(logging.INFO, name=TestRetrieveAndRankProxy.__name__)
        self.new_cluster_id = None
        self.temp_dir = mkdtemp()
    def setUp(self):
        self.given = self
        self.then = self
        self.when = self
        self._and = self
        self._with = self

        self.logger = initialize_logger(logging.INFO, name=TestGenerateRnrFeatures.__name__)
        self.temp_dir = mkdtemp()
        self.bluemix_cluster = None
Example #3
0
    def setUp(self):
        self.given = self
        self.then = self
        self.when = self
        self._and = self

        self.logger = initialize_logger(logging.INFO,
                                        name=TestRankerProxy.__name__)
        self.ranker_id = None
        self.temp_dir = mkdtemp()
Example #4
0
    def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'BluemixServiceProxy')):
        """

        :param ConfigParser config: for access to credentials
        :param logging.Logger logger: for logging
        """
        self.logger = logger
        self.config = config
        self.bluemix_url, self.bluemix_user, self.bluemix_password = get_rnr_credentials(config)
        self.bluemix_connection = RetrieveAndRankV1(url=self.bluemix_url, username=self.bluemix_user,
                                                    password=self.bluemix_password)
    def setUp(self):
        self.given = self
        self.then = self
        self.when = self
        self._and = self

        self.logger = initialize_logger(logging.INFO,
                                        name=TestDiscoveryProxy.__name__)

        self.collection_id = None
        self.config_id = None
        self.temp_dir = mkdtemp()
    def __init__(self,
                 config=load_config(),
                 logger=initialize_logger(logging.INFO, 'DiscoveryProxy')):
        """
        Initialize the connection to Bluemix

        :param CofigParser config: An initialized config with user, password and environment id
        """
        self.config = config
        self.logger = logger
        self.discovery = initialize_discovery_service(config)
        self.environment_id = config.get(
            'Discovery',
            'environment_id',
            fallback=search_for_byod_environment_id(self.discovery))
import argparse
import csv
import logging
from os import makedirs, path

from rnr_debug_helpers.utils.io_helpers import initialize_query_stream, smart_file_open, initialize_logger

TRAIN_RELEVANCE_FILENAME = "train.relevance_file.csv"
VALIDATION_RELEVANCE_FILENAME = 'validation.relevance_file.csv'
LOGGER = initialize_logger(logging.INFO, path.basename(__file__))


def k_fold_cross_validation(X, K, randomise=False):
    """
    Taken from http://code.activestate.com/recipes/521906-k-fold-cross-validation-partition/
    Generates K (training, validation) pairs from the items in X.

    Each pair is a partition of X, where validation is an iterable
    of length len(X)/K. So each training iterable is of length (K-1)*len(X)/K.

    If randomise is true, a copy of X is shuffled before partitioning,
    otherwise its order is preserved in training and validation.

    :param iterable X: set of data points to split into folds
    :param int K: number of folds to create splits for
    :param bool randomise: whether or not incoming list should be randomized
    :return: a train and validation split for each fold.  Implemented as generator, so each call yields a new
        split for a new fold up to k
    :rtype: tuple(list, list)
    """
    if randomise:
Example #8
0
import argparse
import csv
import json
import logging
import sys
from collections import defaultdict, OrderedDict
from copy import deepcopy

from rnr_debug_helpers.utils.answer import Answer
from rnr_debug_helpers.utils.io_helpers import LabelledQuery, initialize_logger, smart_file_open, \
    PredictionReader, initialize_query_stream
from rnr_debug_helpers.utils.stats import compute_average_precision_for_query, compute_ndcg_for_query, \
    compute_top_1_accuracy, compute_recall_for_query

MAX_PRECISION_THRESHOLDS = None
LOGGER = initialize_logger(logging.INFO, "Computing Accuracy")
_DEFAULT_SCORE = -sys.maxsize


def _get_next_n_scores_from_prediction_reader(predictions, num_to_read):
    """
    helper script to read a specific number of predictions from the predictions file

    :param RaasPredictionReader predictions: reader for raas prediction file
    :param num_to_read: number of answers to read
    :return: list of rank scores and list of confidence scores (if any confidence scores were provided)
    :rtype: tuple(list, list)
    """
    rank_scores_for_query = []
    if predictions.is_configured_with_confidence_scores():
        conf_scores_for_query = []
Example #9
0
import argparse
import csv
import logging
import sys
from collections import defaultdict
from os import path

from rnr_debug_helpers.compute_ranking_stats import generate_correct_answer_lookup, assign_labels_and_scores
from rnr_debug_helpers.utils.io_helpers import initialize_logger, smart_file_open, \
    PredictionReader, initialize_query_stream
from rnr_debug_helpers.utils.stats import compute_recall_for_query

LOGGER = initialize_logger(logging.INFO, path.basename(__file__))


def compute_recall_stats(k_settings_for_recall, labelled_query_stream,
                         prediction_reader):
    correct_answers_by_qid = generate_correct_answer_lookup(
        labelled_query_stream)

    LOGGER.info("scoring predictions from: %s (against labels from %s)" %
                (prediction_reader, labelled_query_stream))
    stats = defaultdict(int)
    stats['num_queries'] = len(correct_answers_by_qid)
    try:
        while True:
            labelled_answer_set = assign_labels_and_scores(
                prediction_reader.get_all_predictions_till_next_query(),
                correct_answers_by_qid)
            stats['num_queries_predicted'] += 1
            for k_for_recall in k_settings_for_recall: