Example #1
0
    def _query_dataset_ids(self, solr_base_url, core, query, timestamp_query):
        '''
        Method to query for dataset ids within a given datetime interval.
        '''

        datasets = {}
        url = solr_base_url + "/" + core + "/select"

        # send request
        params = {
            "q": query,
            "fq": timestamp_query,
            "wt": "json",
            "indent": "true",
            "start": "0",
            "rows": "%s" % MAX_DATASETS_PER_HOUR,
            "fl": ["id", "_timestamp"]
        }

        response = http_get_json(url, params)
        if int(response['response']['numFound']) > 0:
            for doc in response['response']['docs']:
                datasets[doc['id']] = doc['_timestamp']

        return datasets
Example #2
0
    def _commit_solr(self, solr_base_url):

        for core in CORES:

            solr_url = solr_base_url + "/" + core + "/update"
            params = {"commit": "true", "wt": "json"}
            logging.info("Committing the Solr index: %s" % solr_url)
            response = http_get_json(solr_url, params)
            logging.debug(response)
Example #3
0
    def _check_record(self, solr_base_url, core, record_id):
        '''Checks for the existence of a record with a given id.'''

        solr_url = solr_base_url + "/" + core + "/select"
        response = http_get_json(solr_url, {
            'q': 'id:%s' % record_id,
            'wt': 'json'
        })

        if int(response["response"]['numFound']) > 0:
            return True
        else:
            return False
Example #4
0
    def query(self, solr_core, query, start, rows, fq):
        '''Method to execute a generic Solr query, return all fields.'''

        url = self._solr_base_url + "/" + solr_core + "/select"

        # send request
        params = {"q": query,
                  "fq": fq,
                  "wt": "json",
                  "indent": "true",
                  "start": "%s" % start,
                  "rows": "%s" % rows
                  }

        jdoc = http_get_json(url, params)
        return jdoc['response']
Example #5
0
    def _query_solr_stats(self, solr_base_url, core, query, fq):
        '''
        Method to query the Solr stats.
        Note: cannot use solrpy because it does not work with 'stats'.
        '''

        url = solr_base_url + "/" + core + "/select"

        # send request
        params = {
            "q": query,
            "fq": fq,
            "wt": "json",
            "indent": "true",
            "stats": "true",
            "stats.field": "_timestamp",
            "rows": "0"
        }

        # default values if HTTP response cannot be retrieved
        (counts, timestamp_min, timestamp_max, timestamp_mean) = (-1, None,
                                                                  None, None)

        try:
            response = http_get_json(url, params)

            # parse response
            # logging.debug("Solr Response: %s" % response)
            counts = response['response']['numFound']
            try:
                timestamp_min = (
                    response['stats']['stats_fields']['_timestamp']['min'])
            except KeyError:
                timestamp_min = None
            try:
                timestamp_max = (
                    response['stats']['stats_fields']['_timestamp']['max'])
            except KeyError:
                timestamp_max = None
            try:
                timestamp_mean = (
                    response['stats']['stats_fields']['_timestamp']['mean'])
            except KeyError:
                timestamp_mean = None

            # convert strings into datetime objects
            # ignore microseconds for comparison
            if timestamp_min is not None:
                timestamp_min = dateutil.parser.parse(timestamp_min).replace(
                    microsecond=0)
            if timestamp_max is not None:
                timestamp_max = dateutil.parser.parse(timestamp_max).replace(
                    microsecond=0)
            if timestamp_mean is not None:
                timestamp_mean = dateutil.parser.parse(timestamp_mean).replace(
                    microsecond=0)

        except urllib.error.URLError as e:
            logging.warning(e)

        # return output
        return [counts, timestamp_min, timestamp_max, timestamp_mean]
Example #6
0
    def optimize(self, solr_core):

        url = "%s/%s/update" % (self._solr_base_url, solr_core)
        return http_get_json(url, {'optimize': 'true'})
Example #7
0
 def commit(self, solr_core):
     url = "%s/%s/update" % (self._solr_base_url, solr_core)
     return http_get_json(url, {'commit': 'true'})