Example #1
0
    def _try_deleting_ranker(self, ranker_id):
        config = load_config()
        url, user_id, password = get_rnr_credentials(config)

        response = requests.get(path.join(url, 'v1/rankers', ranker_id),
                                auth=(user_id, password),
                                headers={
                                    'x-global-transaction-id': 'Rishavs app',
                                    'Content-type': 'application/json'
                                })
        response_text = json.dumps(response.json(), indent=4, sort_keys=True)
        if response.status_code == 200:
            self.logger.info('Found a test ranker that needs cleanup: %s' %
                             response_text)
            response = requests.delete(path.join(url, 'v1/rankers', ranker_id),
                                       auth=(user_id, password),
                                       headers={
                                           'x-global-transaction-id':
                                           'Rishavs app',
                                           'Content-type': 'application/json'
                                       })
            response.raise_for_status()
            self.logger.info("Successfully deleted test ranker: %s" %
                             ranker_id)
        else:
            self.logger.info(
                'No cleanup required for ranker id: %s (got response: %s)' %
                (ranker_id, response_text))
Example #2
0
    def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'BluemixServiceProxy')):
        """

        :param ConfigParser config: for access to credentials
        :param logging.Logger logger: for logging
        """
        self.logger = logger
        self.config = config
        self.bluemix_url, self.bluemix_user, self.bluemix_password = get_rnr_credentials(config)
        self.bluemix_connection = RetrieveAndRankV1(url=self.bluemix_url, username=self.bluemix_user,
                                                    password=self.bluemix_password)
def get_discovery_credentials(config=load_config()):
    url = config.get('Discovery', 'url', fallback=discovery_v1.default_url)
    user = config.get('Discovery', 'user')
    password = config.get('Discovery', 'password')
    version = config.get('Discovery',
                         'version',
                         fallback=discovery_v1.latest_version)
    return {
        'url': url,
        'version': version,
        'username': user,
        'password': password
    }
    def __init__(self,
                 config=load_config(),
                 logger=initialize_logger(logging.INFO, 'DiscoveryProxy')):
        """
        Initialize the connection to Bluemix

        :param CofigParser config: An initialized config with user, password and environment id
        """
        self.config = config
        self.logger = logger
        self.discovery = initialize_discovery_service(config)
        self.environment_id = config.get(
            'Discovery',
            'environment_id',
            fallback=search_for_byod_environment_id(self.discovery))
    def _delete_test_cluster(self, cluster_id):
        self.logger.info("Attempting to clean up the test cluster that was spun up for the unit test: %s" % cluster_id)
        config = load_config()
        url, user_id, password = get_rnr_credentials(config)

        response = requests.get('%s/v1/solr_clusters/%s' % (url, cluster_id),
                                auth=(user_id, password),
                                headers={'x-global-transaction-id': 'Rishavs app',
                                         'Content-type': 'application/json'})
        response_text = json.dumps(response.json(), indent=4, sort_keys=True)
        if response.status_code == 200:
            self.logger.info('Found a test cluster that needs cleanup: %s' % response_text)
            response = requests.delete('%s/v1/solr_clusters/%s' % (url, cluster_id),
                                       auth=(user_id, password),
                                       headers={'x-global-transaction-id': 'Rishavs app',
                                                'Content-type': 'application/json'})
            response.raise_for_status()
            self.logger.info("Successfully deleted test cluster: %s" % cluster_id)
        else:
            self.logger.info('No cleanup required for cluster id: %s (got response: %s)' % (cluster_id, response_text))
import logging
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

from os import path

from rnr_debug_helpers.utils.rnr_wrappers import RetrieveAndRankProxy
from rnr_debug_helpers.utils.io_helpers import initialize_logger, load_config

LOGGER = initialize_logger(logging.INFO, path.basename(__file__))
CONFIG = load_config()
# TODO: make the cluster size an optional parameter
_CLUSTER_SIZE = 2


def main(args):
    LOGGER.info('Start Script')
    solr_cluster = RetrieveAndRankProxy(solr_cluster_id=args.cluster_id,
                                        cluster_name='TestCluster',
                                        cluster_size=_CLUSTER_SIZE)
    LOGGER.info('Initialized bluemix connection to solr cluster: %s' %
                solr_cluster.solr_cluster_id)

    solr_cluster.setup_cluster_and_collection(
        config_id=args.config_id,
        config_zip=args.config_path,
        collection_id=args.collection_name)

    LOGGER.info(
        'Initialized a document collection: %s (there are %d docs in the collection)'
        % (args.collection_name,
           solr_cluster.get_num_docs_in_collection(args.collection_name)))
def generate_rnr_features(in_query_stream, outfile, collection_id, cluster_id, num_rows=30, config=load_config()):
    """
    Iterates over a labelled query stream and generates a feature file with the columns:
        <query_num>,<answer_id>,<fea_0>,<fea_1>,...,<fea_n>,<relevance_label>
    :param rnr_debug_helpers.queries.LabelledQueryStream in_query_stream:
    :param File outfile: where the feature file contents will be written to
    :param str collection_id: the RnR solr collection to use for finding search results
    :param str cluster_id: the RnR solr cluster id to use for finding search results
    :param int or None num_rows: The number of search results that will be retrieved for each query. Defaults to 30
        similar to RnR Web UI/Tooling
    :param ConfigParser config: A config loaded with the credentials to use
    """

    rnr_cluster = RetrieveAndRankProxy(solr_cluster_id=cluster_id, config=config)
    writer = csv.writer(outfile)
    # Iterate over queries and generate feature vectors
    stats = defaultdict(int)
    is_first_row = True

    for qid, query in enumerate(in_query_stream):
        labels_for_relevant_answer_ids = _parse_correct_answer_ids_from_query(query)
        _collect_stats(stats, labels_for_relevant_answer_ids)

        LOGGER.debug("Getting feature vectors for query:<<%s>>" % query.get_qid())
        rnr_search_results = rnr_cluster.get_fcselect_features(query_text=query.get_qid(), collection_id=collection_id,
                                                               generate_header=is_first_row,
                                                               num_results_to_return=num_rows)
        if len(rnr_search_results) == 0:
            stats["num_queries_with_zero_rnr_results"] += 1
        else:
            if is_first_row:
                writer.writerow([_QID_COLUMN_NAME] + rnr_search_results.pop(0) + [_GT_COLUMN_NAME])
                is_first_row = False

            stats["num_queries_with_atleast_one_search_result"] += 1
            stats['num_search_results_retrieved'] += len(rnr_search_results)
            num_possible_correct, num_correct_answers_in_search_results = \
                _print_feature_vectors_and_check_for_correct_answers(writer, rnr_search_results, '%d' % (qid + 1),
                                                                     labels_for_relevant_answer_ids)
            if num_possible_correct != num_correct_answers_in_search_results:
                stats['num_queries_where_at_least_correct_answer_didnt_appear_in_rnr'] += 1
            stats["num_correct_in_search_result"] += num_correct_answers_in_search_results

        if stats["num_queries"] % 100 == 0:
            LOGGER.info("Processed %d queries from input file" % stats['num_queries'])
    _average_stats_across_collection(stats)
    LOGGER.info("Finished processing %d queries from input file" % stats['num_queries'])
    return stats
def initialize_discovery_service(config=load_config()):
    return discovery_v1.DiscoveryV1(**get_discovery_credentials(config))