def _try_deleting_ranker(self, ranker_id): config = load_config() url, user_id, password = get_rnr_credentials(config) response = requests.get(path.join(url, 'v1/rankers', ranker_id), auth=(user_id, password), headers={ 'x-global-transaction-id': 'Rishavs app', 'Content-type': 'application/json' }) response_text = json.dumps(response.json(), indent=4, sort_keys=True) if response.status_code == 200: self.logger.info('Found a test ranker that needs cleanup: %s' % response_text) response = requests.delete(path.join(url, 'v1/rankers', ranker_id), auth=(user_id, password), headers={ 'x-global-transaction-id': 'Rishavs app', 'Content-type': 'application/json' }) response.raise_for_status() self.logger.info("Successfully deleted test ranker: %s" % ranker_id) else: self.logger.info( 'No cleanup required for ranker id: %s (got response: %s)' % (ranker_id, response_text))
def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'BluemixServiceProxy')): """ :param ConfigParser config: for access to credentials :param logging.Logger logger: for logging """ self.logger = logger self.config = config self.bluemix_url, self.bluemix_user, self.bluemix_password = get_rnr_credentials(config) self.bluemix_connection = RetrieveAndRankV1(url=self.bluemix_url, username=self.bluemix_user, password=self.bluemix_password)
def get_discovery_credentials(config=load_config()): url = config.get('Discovery', 'url', fallback=discovery_v1.default_url) user = config.get('Discovery', 'user') password = config.get('Discovery', 'password') version = config.get('Discovery', 'version', fallback=discovery_v1.latest_version) return { 'url': url, 'version': version, 'username': user, 'password': password }
def __init__(self, config=load_config(), logger=initialize_logger(logging.INFO, 'DiscoveryProxy')): """ Initialize the connection to Bluemix :param CofigParser config: An initialized config with user, password and environment id """ self.config = config self.logger = logger self.discovery = initialize_discovery_service(config) self.environment_id = config.get( 'Discovery', 'environment_id', fallback=search_for_byod_environment_id(self.discovery))
def _delete_test_cluster(self, cluster_id): self.logger.info("Attempting to clean up the test cluster that was spun up for the unit test: %s" % cluster_id) config = load_config() url, user_id, password = get_rnr_credentials(config) response = requests.get('%s/v1/solr_clusters/%s' % (url, cluster_id), auth=(user_id, password), headers={'x-global-transaction-id': 'Rishavs app', 'Content-type': 'application/json'}) response_text = json.dumps(response.json(), indent=4, sort_keys=True) if response.status_code == 200: self.logger.info('Found a test cluster that needs cleanup: %s' % response_text) response = requests.delete('%s/v1/solr_clusters/%s' % (url, cluster_id), auth=(user_id, password), headers={'x-global-transaction-id': 'Rishavs app', 'Content-type': 'application/json'}) response.raise_for_status() self.logger.info("Successfully deleted test cluster: %s" % cluster_id) else: self.logger.info('No cleanup required for cluster id: %s (got response: %s)' % (cluster_id, response_text))
import logging from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from os import path from rnr_debug_helpers.utils.rnr_wrappers import RetrieveAndRankProxy from rnr_debug_helpers.utils.io_helpers import initialize_logger, load_config LOGGER = initialize_logger(logging.INFO, path.basename(__file__)) CONFIG = load_config() # TODO: make the cluster size an optional parameter _CLUSTER_SIZE = 2 def main(args): LOGGER.info('Start Script') solr_cluster = RetrieveAndRankProxy(solr_cluster_id=args.cluster_id, cluster_name='TestCluster', cluster_size=_CLUSTER_SIZE) LOGGER.info('Initialized bluemix connection to solr cluster: %s' % solr_cluster.solr_cluster_id) solr_cluster.setup_cluster_and_collection( config_id=args.config_id, config_zip=args.config_path, collection_id=args.collection_name) LOGGER.info( 'Initialized a document collection: %s (there are %d docs in the collection)' % (args.collection_name, solr_cluster.get_num_docs_in_collection(args.collection_name)))
def generate_rnr_features(in_query_stream, outfile, collection_id, cluster_id, num_rows=30, config=load_config()): """ Iterates over a labelled query stream and generates a feature file with the columns: <query_num>,<answer_id>,<fea_0>,<fea_1>,...,<fea_n>,<relevance_label> :param rnr_debug_helpers.queries.LabelledQueryStream in_query_stream: :param File outfile: where the feature file contents will be written to :param str collection_id: the RnR solr collection to use for finding search results :param str cluster_id: the RnR solr cluster id to use for finding search results :param int or None num_rows: The number of search results that will be retrieved for each query. Defaults to 30 similar to RnR Web UI/Tooling :param ConfigParser config: A config loaded with the credentials to use """ rnr_cluster = RetrieveAndRankProxy(solr_cluster_id=cluster_id, config=config) writer = csv.writer(outfile) # Iterate over queries and generate feature vectors stats = defaultdict(int) is_first_row = True for qid, query in enumerate(in_query_stream): labels_for_relevant_answer_ids = _parse_correct_answer_ids_from_query(query) _collect_stats(stats, labels_for_relevant_answer_ids) LOGGER.debug("Getting feature vectors for query:<<%s>>" % query.get_qid()) rnr_search_results = rnr_cluster.get_fcselect_features(query_text=query.get_qid(), collection_id=collection_id, generate_header=is_first_row, num_results_to_return=num_rows) if len(rnr_search_results) == 0: stats["num_queries_with_zero_rnr_results"] += 1 else: if is_first_row: writer.writerow([_QID_COLUMN_NAME] + rnr_search_results.pop(0) + [_GT_COLUMN_NAME]) is_first_row = False stats["num_queries_with_atleast_one_search_result"] += 1 stats['num_search_results_retrieved'] += len(rnr_search_results) num_possible_correct, num_correct_answers_in_search_results = \ _print_feature_vectors_and_check_for_correct_answers(writer, rnr_search_results, '%d' % (qid + 1), labels_for_relevant_answer_ids) if num_possible_correct != num_correct_answers_in_search_results: stats['num_queries_where_at_least_correct_answer_didnt_appear_in_rnr'] += 1 stats["num_correct_in_search_result"] += num_correct_answers_in_search_results if stats["num_queries"] % 100 == 0: LOGGER.info("Processed %d queries from input file" % stats['num_queries']) _average_stats_across_collection(stats) LOGGER.info("Finished processing %d queries from input file" % stats['num_queries']) return stats
def initialize_discovery_service(config=load_config()): return discovery_v1.DiscoveryV1(**get_discovery_credentials(config))