def _query_dataset_ids(self, solr_base_url, core, query, timestamp_query): ''' Method to query for dataset ids within a given datetime interval. ''' datasets = {} url = solr_base_url + "/" + core + "/select" # send request params = { "q": query, "fq": timestamp_query, "wt": "json", "indent": "true", "start": "0", "rows": "%s" % MAX_DATASETS_PER_HOUR, "fl": ["id", "_timestamp"] } response = http_get_json(url, params) if int(response['response']['numFound']) > 0: for doc in response['response']['docs']: datasets[doc['id']] = doc['_timestamp'] return datasets
def _commit_solr(self, solr_base_url): for core in CORES: solr_url = solr_base_url + "/" + core + "/update" params = {"commit": "true", "wt": "json"} logging.info("Committing the Solr index: %s" % solr_url) response = http_get_json(solr_url, params) logging.debug(response)
def _check_record(self, solr_base_url, core, record_id): '''Checks for the existence of a record with a given id.''' solr_url = solr_base_url + "/" + core + "/select" response = http_get_json(solr_url, { 'q': 'id:%s' % record_id, 'wt': 'json' }) if int(response["response"]['numFound']) > 0: return True else: return False
def query(self, solr_core, query, start, rows, fq): '''Method to execute a generic Solr query, return all fields.''' url = self._solr_base_url + "/" + solr_core + "/select" # send request params = {"q": query, "fq": fq, "wt": "json", "indent": "true", "start": "%s" % start, "rows": "%s" % rows } jdoc = http_get_json(url, params) return jdoc['response']
def _query_solr_stats(self, solr_base_url, core, query, fq): ''' Method to query the Solr stats. Note: cannot use solrpy because it does not work with 'stats'. ''' url = solr_base_url + "/" + core + "/select" # send request params = { "q": query, "fq": fq, "wt": "json", "indent": "true", "stats": "true", "stats.field": "_timestamp", "rows": "0" } # default values if HTTP response cannot be retrieved (counts, timestamp_min, timestamp_max, timestamp_mean) = (-1, None, None, None) try: response = http_get_json(url, params) # parse response # logging.debug("Solr Response: %s" % response) counts = response['response']['numFound'] try: timestamp_min = ( response['stats']['stats_fields']['_timestamp']['min']) except KeyError: timestamp_min = None try: timestamp_max = ( response['stats']['stats_fields']['_timestamp']['max']) except KeyError: timestamp_max = None try: timestamp_mean = ( response['stats']['stats_fields']['_timestamp']['mean']) except KeyError: timestamp_mean = None # convert strings into datetime objects # ignore microseconds for comparison if timestamp_min is not None: timestamp_min = dateutil.parser.parse(timestamp_min).replace( microsecond=0) if timestamp_max is not None: timestamp_max = dateutil.parser.parse(timestamp_max).replace( microsecond=0) if timestamp_mean is not None: timestamp_mean = dateutil.parser.parse(timestamp_mean).replace( microsecond=0) except urllib.error.URLError as e: logging.warning(e) # return output return [counts, timestamp_min, timestamp_max, timestamp_mean]
def optimize(self, solr_core): url = "%s/%s/update" % (self._solr_base_url, solr_core) return http_get_json(url, {'optimize': 'true'})
def commit(self, solr_core): url = "%s/%s/update" % (self._solr_base_url, solr_core) return http_get_json(url, {'commit': 'true'})