Esempio n. 1
0
    def _build_query(self, query_dict, limit=None, offset=None, shards=None):
        if shards is not None:
            if self._available_shards is None:
                self._load_available_shards()

            shard_specs = []
            for shard in shards:
                if shard not in self._available_shards:
                    raise EsgfSearchException('Shard %s is not available' % shard)
                else:
                    for port, suffix in self._available_shards[shard]:
                        # suffix should be ommited when querying
                        shard_specs.append('%s:%s/solr' % (shard, port))

            shard_str = ','.join(shard_specs)
        else:
            shard_str = None

        full_query = MultiDict({
            'format': RESPONSE_FORMAT,
            'limit': limit,
            'distrib': 'true' if self.distrib else 'false',
            'offset': offset,
            'shards': shard_str,
        })
        full_query.extend(query_dict)

        # Remove all None valued items
        full_query = MultiDict(item for item in full_query.items() if item[1] is not None)

        return full_query
Esempio n. 2
0
    def send_query(self, query_dict, limit=None, offset=None):
        """
        Generally not to be called directly by the user but via SearchContext
	instances.
        
        :param query_dict: dictionary of query string parameers to send.
        :return: ElementTree instance (TODO: think about this)
        
        """
        
        full_query = MultiDict({
            'format': RESPONSE_FORMAT,
            'limit': limit,
            'distrib': 'true' if self.distrib else 'false',
            'offset': offset,
            'shards': ','.join(self.shards) if self.shards else None,
            })
        full_query.extend(query_dict)

        # Remove all None valued items
        full_query = MultiDict(item for item in full_query.items() if item[1] is not None)


        query_url = '%s?%s' % (self.url, urllib.urlencode(full_query))
        log.debug('Query request is %s' % query_url)

        response = urllib2.urlopen(query_url)
        ret = json.load(response)

        return ret
Esempio n. 3
0
def build_constraint_dict(constraints):
    c_dict = MultiDict()
    if constraints:
        for constrain in constraints.split(','):
            if ':' in constrain.strip():
                key, value = constrain.split(':', 1)
                c_dict.add(key, value)
    return c_dict
Esempio n. 4
0
def build_constraint_dict(constraints):
    c_dict = MultiDict()
    if constraints:
        for constrain in constraints.split(','):
            if ':' in constrain.strip():
                key, value = constrain.split(':', 1)
                c_dict.add(key, value)
    return c_dict
Esempio n. 5
0
    def __init__(self,
                 connection,
                 constraints,
                 search_type=TYPE_DATASET,
                 latest=None,
                 facets=None,
                 fields=None,
                 from_timestamp=None,
                 to_timestamp=None,
                 replica=None):
        """
        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
	:param type: One of TYPE_* constants defining the document type to
	    search for
	:param facets: The list of facets for which counts will be retrieved
	    and constraints be validated against.  Or None to represent all
	    facets.
	:param fields: A list of field names to return in search responses
	:param replica: A boolean defining whether to return master records
	    or replicas, or None to return both.
	:param latest: A boolean defining whether to return only latest verisons
	    or only non-latest versions, or None to return both.

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = (None, None)
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' %
                                      ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica
Esempio n. 6
0
def convert_constraints(url):
    """
    converts esgf search query to constraints parameter.
    TODO: constraints parameter should have the same structure as the esgf query.
    """
    # FROM: project=CMIP5&time_frequency=mon&variable=tas,tasmax,tasmin
    # TO: project:CORDEX,experiment:historical,experiment:rcp26
    parsed_url = urlparse(url)
    constraints = MultiDict()
    for qpart in parsed_url.query.split('&'):
        key, value = qpart.split('=')
        for val in value.split(','):
            constraints.add(key.strip(), val.strip())
    converted = ','.join(["{0[0]}:{0[1]}".format(c) for c in constraints.items()])
    return converted
Esempio n. 7
0
def convert_constraints(url):
    """
    converts esgf search query to constraints parameter.
    TODO: constraints parameter should have the same structure as the esgf query.
    """
    # FROM: project=CMIP5&time_frequency=mon&variable=tas,tasmax,tasmin
    # TO: project:CORDEX,experiment:historical,experiment:rcp26
    parsed_url = urlparse(url)
    constraints = MultiDict()
    for qpart in parsed_url.query.split('&'):
        key, value = qpart.split('=')
        for val in value.split(','):
            constraints.add(key.strip(), val.strip())
    converted = ','.join(
        ["{0[0]}:{0[1]}".format(c) for c in constraints.iteritems()])
    return converted
    def _build_query(self, query_dict, limit=None, offset=None, shards=None):
        if shards is not None:
            if self._available_shards is None:
                self._load_available_shards()

            shard_specs = []
            for shard in shards:
                if shard not in self._available_shards:
                    raise EsgfSearchException('Shard %s is not available' %
                                              shard)
                else:
                    for port, suffix in self._available_shards[shard]:
                        # suffix should be ommited when querying
                        shard_specs.append('%s:%s/solr' % (shard, port))

            shard_str = ','.join(shard_specs)
        else:
            shard_str = None

        full_query = MultiDict({
            'format': RESPONSE_FORMAT,
            'limit': limit,
            'distrib': 'true' if self.distrib else 'false',
            'offset': offset,
            'shards': shard_str,
        })
        full_query.extend(query_dict)

        # Remove all None valued items
        full_query = MultiDict(item for item in full_query.items()
                               if item[1] is not None)

        return full_query
Esempio n. 9
0
    def send_query(self, query_dict, limit=None, offset=None):
        """
        Generally not to be called directly by the user but via SearchContext
	instances.
        
        :param query_dict: dictionary of query string parameers to send.
        :return: ElementTree instance (TODO: think about this)
        
        """

        full_query = MultiDict({
            'format':
            RESPONSE_FORMAT,
            'limit':
            limit,
            'distrib':
            'true' if self.distrib else 'false',
            'offset':
            offset,
            'shards':
            ','.join(self.shards) if self.shards else None,
        })
        full_query.extend(query_dict)

        # Remove all None valued items
        full_query = MultiDict(item for item in full_query.items()
                               if item[1] is not None)

        query_url = '%s?%s' % (self.url, urllib.urlencode(full_query))
        log.debug('Query request is %s' % query_url)

        response = urllib2.urlopen(query_url)
        ret = json.load(response)

        return ret
Esempio n. 10
0
    def __init__(self, connection, constraints, search_type=None,
                 latest=None, facets=None, fields=None,
                 from_timestamp=None, to_timestamp=None,
                 replica=None, shards=None):
        """

        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
        :param search_type: One of TYPE_* constants defining the document
            type to search for.  Overrides SearchContext.DEFAULT_SEARCH_TYPE
        :param facets: The list of facets for which counts will be retrieved
            and constraints be validated against.  Or None to represent all
            facets.
        :param fields: A list of field names to return in search responses
        :param replica: A boolean defining whether to return master records
            or replicas, or None to return both.
        :param latest: A boolean defining whether to return only latest verisons
            or only non-latest versions, or None to return both.
        :param shards: list of shards to restrict searches to.  Should be from the list
            self.connection.get_shard_list()
        :param from_timestamp: Date-time string to specify start of search range 
            (e.g. "2000-01-01T00:00:00Z"). 
        :param to_timestamp: Date-time string to specify end of search range
            (e.g. "2100-12-31T23:59:59Z").

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        if search_type is None:
            search_type = self.DEFAULT_SEARCH_TYPE

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = [from_timestamp, to_timestamp]
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s'
                                      % ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica
        self.shards = shards
Esempio n. 11
0
    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({"query": self.freetext_constraint,
                      "type": self.search_type,
                      "latest": self.latest,
                      "facets": self.facets,
                      "fields": self.fields,
                      "replica": self.replica,
                      })

        query_dict.extend(self.facet_constraints)
        
        #!TODO: encode datetime
        #start, end = self.temporal_constraint
        #query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 12
0
    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({
            "query": self.freetext_constraint,
            "type": self.search_type,
            "latest": self.latest,
            "facets": self.facets,
            "fields": self.fields,
            "replica": self.replica,
        })

        query_dict.extend(self.facet_constraints)

        #!TODO: encode datetime
        #start, end = self.temporal_constraint
        #query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 13
0
    def __init__(self, connection, constraints, search_type=TYPE_DATASET,
		 latest=None, facets=None, fields=None,
                 from_timestamp=None, to_timestamp=None,
		 replica=None):
        """
        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
	:param type: One of TYPE_* constants defining the document type to
	    search for
	:param facets: The list of facets for which counts will be retrieved
	    and constraints be validated against.  Or None to represent all
	    facets.
	:param fields: A list of field names to return in search responses
	:param replica: A boolean defining whether to return master records
	    or replicas, or None to return both.
	:param latest: A boolean defining whether to return only latest verisons
	    or only non-latest versions, or None to return both.

        """
        
        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None
        
        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = (None, None)
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

	# Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' 
                                      % ','.join(search_types))
	self.search_type = search_type

	self.latest = latest
	self.facets = facets
        self.fields = fields
        self.replica = replica
Esempio n. 14
0
    def _split_constraints(self, constraints):
        """
        Divide a constraint dictionary into 4 types of constraints:
        1. Freetext query
        2. Facet constraints
        3. Temporal constraints
        4. Geospatial constraints

        :return: A dictionary of the 4 types of constraint.
        
        """
        # local import to prevent circular importing
        from .connection import query_keyword_type

        constraints_split = dict(
            (kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)
        for kw, val in constraints.items():
            constraint_type = query_keyword_type(kw)
            constraints_split[constraint_type][kw] = val

        return constraints_split
Esempio n. 15
0
 def test_view_list_not_list(self):
     from pyesgf.multidict import MultiDict
     d = MultiDict()
     self.assertRaises(TypeError, d.view_list, 42)
Esempio n. 16
0
class SearchContext(object):
    """
    Instances of this class represent the state of a current search.
    It exposes what facets are available to select and the facet counts
    if they are available.
    
    Subclasses of this class can restrict the search options.  For instance
    FileSearchContext, DatasetSerachContext or CMIP5SearchContext
    
    SearchContext instances are connected to SearchConnection instances.  You
    normally create SearchContext instances via one of:
    1. Calling SearchConnection.new_context()
    2. Calling SearchContext.constrain()
    
    :ivar constraints: A dictionary of facet constraints currently in effect.
        constraint[facet_name] = [value, value, ...]
        
    """

    def __init__(self, connection, constraints, search_type=TYPE_DATASET,
		 latest=None, facets=None, fields=None,
                 from_timestamp=None, to_timestamp=None,
		 replica=None):
        """
        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
	:param type: One of TYPE_* constants defining the document type to
	    search for
	:param facets: The list of facets for which counts will be retrieved
	    and constraints be validated against.  Or None to represent all
	    facets.
	:param fields: A list of field names to return in search responses
	:param replica: A boolean defining whether to return master records
	    or replicas, or None to return both.
	:param latest: A boolean defining whether to return only latest verisons
	    or only non-latest versions, or None to return both.

        """
        
        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None
        
        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = (None, None)
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

	# Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' 
                                      % ','.join(search_types))
	self.search_type = search_type

	self.latest = latest
	self.facets = facets
        self.fields = fields
        self.replica = replica

    #-------------------------------------------------------------------------
    # Functional search interface
    # These do not change the constraints on self.

    def search(self, **constraints):
        """
        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).search()
        :return: A ResultSet for this query

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        self.__update_counts()
        
        return ResultSet(sc)

    def constrain(self, **constraints):
        """
        Return a *new* instance with the additional constraints.
        
        """
        new_sc = copy.copy(self)
        new_sc._update_constraints(constraints)
	return new_sc


    @property
    def facet_counts(self):
        self.__update_counts()
        return self.__facet_counts

    @property
    def hit_count(self):
        self.__update_counts()
        return self.__hit_count

    def get_facet_options(self):
        """
        Return a dictionary of facet counts filtered to remove all facets that
        are completely constrained.
        
        """
        facet_options = {}
        hits = self.hit_count
        for facet, counts in self.facet_counts.items():
            # filter out counts that match total hits
            counts = dict(items for items in counts.items()
                          if items[1] < hits)
            if len(counts) > 1:
                facet_options[facet] = counts

        return facet_options

    def __update_counts(self):
        # If hit_count is set the counts are already retrieved
        if self.__hit_count is not None:
            return
        
        self.__facet_counts = {}
        self.__hit_count = None
        query_dict = self._build_query()
        query_dict['facets'] = '*'

        response = self.connection.send_query(query_dict, limit=0)
        for facet, counts in (
                response['facet_counts']['facet_fields'].items()):
            d = self.__facet_counts[facet] = {}
            while counts:
                d[counts.pop()] = counts.pop()

        self.__hit_count = response['response']['numFound']
                
    #-------------------------------------------------------------------------
    # Constraint mutation interface
    # These functions update the instance in-place.
    # Use constrain() and search() to generate new contexts with tighter
    # constraints.

    def _update_constraints(self, constraints):
        """
        Update the constraints in-place by calling _constrain_*() methods.
        
        """
        constraints_split = self._split_constraints(constraints)
        self._constrain_facets(constraints_split['facet'])
        if 'query' in constraints_split['freetext']:
            new_freetext = constraints_split['freetext']['query']
            self._constrain_freetext(new_freetext)

        #!TODO: implement temporal and geospatial constraints
        #self._constrain_temporal()
        #self._constrain_geospatial()

        # reset cached values
        self.__hit_count = None
        self.__facet_counts = None

    def _constrain_facets(self, facet_constraints):
        for key, values in facet_constraints.mixed().items():
            current_values = self.facet_constraints.getall(key)
            if isinstance(values, list):
                for value in values:
                    if value not in current_values:
                        self.facet_constraints.add(key, value)
            else:
                if values not in current_values:
                    self.facet_constraints.add(key, values)
        
    
    def _constrain_freetext(self, query):
        self.freetext_constraint = query

    def _constrain_temporal(self, start, end):
        """
        :param start: a datetime instance specifying the start of the temporal
            constraint.
        :param end: a datetime instance specifying the end of the temporal
            constraint.

        """
        #!TODO: support solr date keywords like "NOW" and "NOW-1DAY"
        #     we will probably need a separate TemporalConstraint object
        self.temporal_constraint = (start, end)

    def _constrain_geospatial(self, lat=None, lon=None, bbox=None, location=None,
                 radius=None, polygon=None):
        self.geospatial_constraint = GeospatialConstraint(lat, lon, bbox, location, radius, polygon)

        raise NotImplementedError
        
    #-------------------------------------------------------------------------

    def _split_constraints(self, constraints):
        """
        Divide a constraint dictionary into 4 types of constraints:
        1. Freetext query
        2. Facet constraints
        3. Temporal constraints
        4. Geospatial constraints

        :return: A dictionary of the 4 types of constraint.
        
        """
        # local import to prevent circular importing
        from .connection import query_keyword_type

        constraints_split = dict((kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)
        for kw, val in constraints.items():
            constraint_type = query_keyword_type(kw)
            constraints_split[constraint_type][kw] = val

        return constraints_split
        
    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({"query": self.freetext_constraint,
                      "type": self.search_type,
                      "latest": self.latest,
                      "facets": self.facets,
                      "fields": self.fields,
                      "replica": self.replica,
                      })

        query_dict.extend(self.facet_constraints)
        
        #!TODO: encode datetime
        #start, end = self.temporal_constraint
        #query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 17
0
class SearchContext(object):
    """
    Instances of this class represent the state of a current search.
    It exposes what facets are available to select and the facet counts
    if they are available.
    
    Subclasses of this class can restrict the search options.  For instance
    FileSearchContext, DatasetSerachContext or CMIP5SearchContext
    
    SearchContext instances are connected to SearchConnection instances.  You
    normally create SearchContext instances via one of:
    1. Calling SearchConnection.new_context()
    2. Calling SearchContext.constrain()
    
    :ivar constraints: A dictionary of facet constraints currently in effect.
        constraint[facet_name] = [value, value, ...]
        
    """
    def __init__(self,
                 connection,
                 constraints,
                 search_type=TYPE_DATASET,
                 latest=None,
                 facets=None,
                 fields=None,
                 from_timestamp=None,
                 to_timestamp=None,
                 replica=None):
        """
        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
	:param type: One of TYPE_* constants defining the document type to
	    search for
	:param facets: The list of facets for which counts will be retrieved
	    and constraints be validated against.  Or None to represent all
	    facets.
	:param fields: A list of field names to return in search responses
	:param replica: A boolean defining whether to return master records
	    or replicas, or None to return both.
	:param latest: A boolean defining whether to return only latest verisons
	    or only non-latest versions, or None to return both.

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = (None, None)
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' %
                                      ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica

    #-------------------------------------------------------------------------
    # Functional search interface
    # These do not change the constraints on self.

    def search(self, **constraints):
        """
        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).search()
        :return: A ResultSet for this query

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        self.__update_counts()

        return ResultSet(sc)

    def constrain(self, **constraints):
        """
        Return a *new* instance with the additional constraints.
        
        """
        new_sc = copy.copy(self)
        new_sc._update_constraints(constraints)
        return new_sc

    @property
    def facet_counts(self):
        self.__update_counts()
        return self.__facet_counts

    @property
    def hit_count(self):
        self.__update_counts()
        return self.__hit_count

    def get_facet_options(self):
        """
        Return a dictionary of facet counts filtered to remove all facets that
        are completely constrained.
        
        """
        facet_options = {}
        hits = self.hit_count
        for facet, counts in self.facet_counts.items():
            # filter out counts that match total hits
            counts = dict(items for items in counts.items() if items[1] < hits)
            if len(counts) > 1:
                facet_options[facet] = counts

        return facet_options

    def __update_counts(self):
        # If hit_count is set the counts are already retrieved
        if self.__hit_count is not None:
            return

        self.__facet_counts = {}
        self.__hit_count = None
        query_dict = self._build_query()
        query_dict['facets'] = '*'

        response = self.connection.send_query(query_dict, limit=0)
        for facet, counts in (
                response['facet_counts']['facet_fields'].items()):
            d = self.__facet_counts[facet] = {}
            while counts:
                d[counts.pop()] = counts.pop()

        self.__hit_count = response['response']['numFound']

    #-------------------------------------------------------------------------
    # Constraint mutation interface
    # These functions update the instance in-place.
    # Use constrain() and search() to generate new contexts with tighter
    # constraints.

    def _update_constraints(self, constraints):
        """
        Update the constraints in-place by calling _constrain_*() methods.
        
        """
        constraints_split = self._split_constraints(constraints)
        self._constrain_facets(constraints_split['facet'])
        if 'query' in constraints_split['freetext']:
            new_freetext = constraints_split['freetext']['query']
            self._constrain_freetext(new_freetext)

        #!TODO: implement temporal and geospatial constraints
        #self._constrain_temporal()
        #self._constrain_geospatial()

        # reset cached values
        self.__hit_count = None
        self.__facet_counts = None

    def _constrain_facets(self, facet_constraints):
        for key, values in facet_constraints.mixed().items():
            current_values = self.facet_constraints.getall(key)
            if isinstance(values, list):
                for value in values:
                    if value not in current_values:
                        self.facet_constraints.add(key, value)
            else:
                if values not in current_values:
                    self.facet_constraints.add(key, values)

    def _constrain_freetext(self, query):
        self.freetext_constraint = query

    def _constrain_temporal(self, start, end):
        """
        :param start: a datetime instance specifying the start of the temporal
            constraint.
        :param end: a datetime instance specifying the end of the temporal
            constraint.

        """
        #!TODO: support solr date keywords like "NOW" and "NOW-1DAY"
        #     we will probably need a separate TemporalConstraint object
        self.temporal_constraint = (start, end)

    def _constrain_geospatial(self,
                              lat=None,
                              lon=None,
                              bbox=None,
                              location=None,
                              radius=None,
                              polygon=None):
        self.geospatial_constraint = GeospatialConstraint(
            lat, lon, bbox, location, radius, polygon)

        raise NotImplementedError

    #-------------------------------------------------------------------------

    def _split_constraints(self, constraints):
        """
        Divide a constraint dictionary into 4 types of constraints:
        1. Freetext query
        2. Facet constraints
        3. Temporal constraints
        4. Geospatial constraints

        :return: A dictionary of the 4 types of constraint.
        
        """
        # local import to prevent circular importing
        from .connection import query_keyword_type

        constraints_split = dict(
            (kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)
        for kw, val in constraints.items():
            constraint_type = query_keyword_type(kw)
            constraints_split[constraint_type][kw] = val

        return constraints_split

    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({
            "query": self.freetext_constraint,
            "type": self.search_type,
            "latest": self.latest,
            "facets": self.facets,
            "fields": self.fields,
            "replica": self.replica,
        })

        query_dict.extend(self.facet_constraints)

        #!TODO: encode datetime
        #start, end = self.temporal_constraint
        #query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 18
0
class SearchContext(object):
    """
    Instances of this class represent the state of a current search.
    It exposes what facets are available to select and the facet counts
    if they are available.
    
    Subclasses of this class can restrict the search options.  For instance
    FileSearchContext, DatasetSerachContext or CMIP5SearchContext
    
    SearchContext instances are connected to SearchConnection instances.  You
    normally create SearchContext instances via one of:
    1. Calling SearchConnection.new_context()
    2. Calling SearchContext.constrain()
    
    :ivar constraints: A dictionary of facet constraints currently in effect.
        ``constraint[facet_name] = [value, value, ...]``
    :property facet_counts: A dictionary of available hits with each 
        facet value for the search as currently constrained.
        This property returns a dictionary of dictionaries where 
        ``facet_counts[facet][facet_value] == hit_count``
    :property hit_count: The total number of hits available with current constraints.

        
    """

    DEFAULT_SEARCH_TYPE = NotImplemented

    def __init__(self, connection, constraints, search_type=None,
                 latest=None, facets=None, fields=None,
                 from_timestamp=None, to_timestamp=None,
                 replica=None, shards=None):
        """

        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
        :param search_type: One of TYPE_* constants defining the document
            type to search for.  Overrides SearchContext.DEFAULT_SEARCH_TYPE
        :param facets: The list of facets for which counts will be retrieved
            and constraints be validated against.  Or None to represent all
            facets.
        :param fields: A list of field names to return in search responses
        :param replica: A boolean defining whether to return master records
            or replicas, or None to return both.
        :param latest: A boolean defining whether to return only latest verisons
            or only non-latest versions, or None to return both.
        :param shards: list of shards to restrict searches to.  Should be from the list
            self.connection.get_shard_list()
        :param from_timestamp: Date-time string to specify start of search range 
            (e.g. "2000-01-01T00:00:00Z"). 
        :param to_timestamp: Date-time string to specify end of search range
            (e.g. "2100-12-31T23:59:59Z").

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        if search_type is None:
            search_type = self.DEFAULT_SEARCH_TYPE

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = [from_timestamp, to_timestamp]
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s'
                                      % ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica
        self.shards = shards

    #-------------------------------------------------------------------------
    # Functional search interface
    # These do not change the constraints on self.

    def search(self, **constraints):
        """
        Perform the search with current constraints returning a set of results.

        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).search()
        :return: A ResultSet for this query

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        self.__update_counts()

        return ResultSet(sc)

    def constrain(self, **constraints):
        """
        Return a *new* instance with the additional constraints.
        
        """
        new_sc = copy.deepcopy(self)
        new_sc._update_constraints(constraints)
        return new_sc

    def get_download_script(self, **constraints):
        """
        Download a script for downloading all files in the set of results.

        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).get_download_script()
        :return: A string containing the script

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        sc.__update_counts()

        query_dict = sc._build_query()

        #!TODO: allow setting limit
        script = sc.connection.send_wget(query_dict,
                                         shards=self.shards)

        return script

    @property
    def facet_counts(self):
        self.__update_counts()
        return self.__facet_counts

    @property
    def hit_count(self):
        self.__update_counts()
        return self.__hit_count

    def get_facet_options(self):
        """
        Return a dictionary of facet counts filtered to remove all
        facets that are completely constrained.  This method is
        similar to the property ``facet_counts`` except facet values
        which are not relevant for further constraining are removed.
        
        """
        facet_options = {}
        hits = self.hit_count
        for facet, counts in self.facet_counts.items():
            # filter out counts that match total hits
            counts = dict(items for items in counts.items()
                          if items[1] < hits)
            if len(counts) > 1:
                facet_options[facet] = counts

        return facet_options

    def __update_counts(self):
        # If hit_count is set the counts are already retrieved
        if self.__hit_count is not None:
            return

        self.__facet_counts = {}
        self.__hit_count = None
        query_dict = self._build_query()
        query_dict['facets'] = '*'

        response = self.connection.send_search(query_dict, limit=0)
        for facet, counts in (
            response['facet_counts']['facet_fields'].items()):
            d = self.__facet_counts[facet] = {}
            while counts:
                d[counts.pop()] = counts.pop()

        self.__hit_count = response['response']['numFound']

    #-------------------------------------------------------------------------
    # Constraint mutation interface
    # These functions update the instance in-place.
    # Use constrain() and search() to generate new contexts with tighter
    # constraints.

    def _update_constraints(self, constraints):
        """
        Update the constraints in-place by calling _constrain_*() methods.
        
        """
        constraints_split = self._split_constraints(constraints)
        self._constrain_facets(constraints_split['facet'])
        if 'query' in constraints_split['freetext']:
            new_freetext = constraints_split['freetext']['query']
            self._constrain_freetext(new_freetext)

        #!TODO: implement temporal and geospatial constraints
        if 'from_timestamp' in constraints_split['temporal']:
            self.temporal_constraint[0] = constraints_split['temporal']['from_timestamp']
        if 'to_timestamp' in constraints_split['temporal']:
            self.temporal_constraint[1] = constraints_split['temporal']['to_timestamp']
        #self._constrain_geospatial()

        # reset cached values
        self.__hit_count = None
        self.__facet_counts = None

    def _constrain_facets(self, facet_constraints):
        for key, values in facet_constraints.mixed().items():
            current_values = self.facet_constraints.getall(key)
            if isinstance(values, list):
                for value in values:
                    if value not in current_values:
                        self.facet_constraints.add(key, value)
            else:
                if values not in current_values:
                    self.facet_constraints.add(key, values)

    def _constrain_freetext(self, query):
        self.freetext_constraint = query

    def _constrain_geospatial(self, lat=None, lon=None, bbox=None, location=None,
                              radius=None, polygon=None):
        self.geospatial_constraint = GeospatialConstraint(lat, lon, bbox, location, radius, polygon)

        raise NotImplementedError

    #-------------------------------------------------------------------------

    def _split_constraints(self, constraints):
        """
        Divide a constraint dictionary into 4 types of constraints:
        1. Freetext query
        2. Facet constraints
        3. Temporal constraints
        4. Geospatial constraints

        :return: A dictionary of the 4 types of constraint.
        
        """
        # local import to prevent circular importing
        from .connection import query_keyword_type

        constraints_split = dict((kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)

        for kw, val in constraints.items():
            constraint_type = query_keyword_type(kw)
            constraints_split[constraint_type][kw] = val

        return constraints_split

    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({"query": self.freetext_constraint,
                                "type": self.search_type,
                                "latest": self.latest,
                                "facets": self.facets,
                                "fields": self.fields,
                                "replica": self.replica,
        })

        query_dict.extend(self.facet_constraints)

        #!TODO: encode datetime
        start, end = self.temporal_constraint
        query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 19
0
class SearchContext(object):
    """
    Instances of this class represent the state of a current search.
    It exposes what facets are available to select and the facet counts
    if they are available.
    
    Subclasses of this class can restrict the search options.  For instance
    FileSearchContext, DatasetSerachContext or CMIP5SearchContext
    
    SearchContext instances are connected to SearchConnection instances.  You
    normally create SearchContext instances via one of:
    1. Calling SearchConnection.new_context()
    2. Calling SearchContext.constrain()
    
    :ivar constraints: A dictionary of facet constraints currently in effect.
        ``constraint[facet_name] = [value, value, ...]``
    :property facet_counts: A dictionary of available hits with each 
        facet value for the search as currently constrained.
        This property returns a dictionary of dictionaries where 
        ``facet_counts[facet][facet_value] == hit_count``
    :property hit_count: The total number of hits available with current constraints.

        
    """

    DEFAULT_SEARCH_TYPE = NotImplemented

    def __init__(self,
                 connection,
                 constraints,
                 search_type=None,
                 latest=None,
                 facets=None,
                 fields=None,
                 from_timestamp=None,
                 to_timestamp=None,
                 replica=None,
                 shards=None):
        """

        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
        :param search_type: One of TYPE_* constants defining the document
            type to search for.  Overrides SearchContext.DEFAULT_SEARCH_TYPE
        :param facets: The list of facets for which counts will be retrieved
            and constraints be validated against.  Or None to represent all
            facets.
        :param fields: A list of field names to return in search responses
        :param replica: A boolean defining whether to return master records
            or replicas, or None to return both.
        :param latest: A boolean defining whether to return only latest verisons
            or only non-latest versions, or None to return both.
        :param shards: list of shards to restrict searches to.  Should be from the list
            self.connection.get_shard_list()
        :param from_timestamp: Date-time string to specify start of search range 
            (e.g. "2000-01-01T00:00:00Z"). 
        :param to_timestamp: Date-time string to specify end of search range
            (e.g. "2100-12-31T23:59:59Z").

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        if search_type is None:
            search_type = self.DEFAULT_SEARCH_TYPE

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = [from_timestamp, to_timestamp]
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' %
                                      ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica
        self.shards = shards

    #-------------------------------------------------------------------------
    # Functional search interface
    # These do not change the constraints on self.

    def search(self, **constraints):
        """
        Perform the search with current constraints returning a set of results.

        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).search()
        :return: A ResultSet for this query

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        self.__update_counts()

        return ResultSet(sc)

    def constrain(self, **constraints):
        """
        Return a *new* instance with the additional constraints.
        
        """
        new_sc = copy.deepcopy(self)
        new_sc._update_constraints(constraints)
        return new_sc

    def get_download_script(self, **constraints):
        """
        Download a script for downloading all files in the set of results.

        :param constraints: Further constraints for this query.  Equivilent
            to calling self.constrain(**constraints).get_download_script()
        :return: A string containing the script

        """
        if constraints:
            sc = self.constrain(**constraints)
        else:
            sc = self

        sc.__update_counts()

        query_dict = sc._build_query()

        #!TODO: allow setting limit
        script = sc.connection.send_wget(query_dict, shards=self.shards)

        return script

    @property
    def facet_counts(self):
        self.__update_counts()
        return self.__facet_counts

    @property
    def hit_count(self):
        self.__update_counts()
        return self.__hit_count

    def get_facet_options(self):
        """
        Return a dictionary of facet counts filtered to remove all
        facets that are completely constrained.  This method is
        similar to the property ``facet_counts`` except facet values
        which are not relevant for further constraining are removed.
        
        """
        facet_options = {}
        hits = self.hit_count
        for facet, counts in self.facet_counts.items():
            # filter out counts that match total hits
            counts = dict(items for items in counts.items() if items[1] < hits)
            if len(counts) > 1:
                facet_options[facet] = counts

        return facet_options

    def __update_counts(self):
        # If hit_count is set the counts are already retrieved
        if self.__hit_count is not None:
            return

        self.__facet_counts = {}
        self.__hit_count = None
        query_dict = self._build_query()
        query_dict['facets'] = '*'

        response = self.connection.send_search(query_dict, limit=0)
        for facet, counts in (
                response['facet_counts']['facet_fields'].items()):
            d = self.__facet_counts[facet] = {}
            while counts:
                d[counts.pop()] = counts.pop()

        self.__hit_count = response['response']['numFound']

    #-------------------------------------------------------------------------
    # Constraint mutation interface
    # These functions update the instance in-place.
    # Use constrain() and search() to generate new contexts with tighter
    # constraints.

    def _update_constraints(self, constraints):
        """
        Update the constraints in-place by calling _constrain_*() methods.
        
        """
        constraints_split = self._split_constraints(constraints)
        self._constrain_facets(constraints_split['facet'])
        if 'query' in constraints_split['freetext']:
            new_freetext = constraints_split['freetext']['query']
            self._constrain_freetext(new_freetext)

        #!TODO: implement temporal and geospatial constraints
        if 'from_timestamp' in constraints_split['temporal']:
            self.temporal_constraint[0] = constraints_split['temporal'][
                'from_timestamp']
        if 'to_timestamp' in constraints_split['temporal']:
            self.temporal_constraint[1] = constraints_split['temporal'][
                'to_timestamp']
        #self._constrain_geospatial()

        # reset cached values
        self.__hit_count = None
        self.__facet_counts = None

    def _constrain_facets(self, facet_constraints):
        for key, values in facet_constraints.mixed().items():
            current_values = self.facet_constraints.getall(key)
            if isinstance(values, list):
                for value in values:
                    if value not in current_values:
                        self.facet_constraints.add(key, value)
            else:
                if values not in current_values:
                    self.facet_constraints.add(key, values)

    def _constrain_freetext(self, query):
        self.freetext_constraint = query

    def _constrain_geospatial(self,
                              lat=None,
                              lon=None,
                              bbox=None,
                              location=None,
                              radius=None,
                              polygon=None):
        self.geospatial_constraint = GeospatialConstraint(
            lat, lon, bbox, location, radius, polygon)

        raise NotImplementedError

    #-------------------------------------------------------------------------

    def _split_constraints(self, constraints):
        """
        Divide a constraint dictionary into 4 types of constraints:
        1. Freetext query
        2. Facet constraints
        3. Temporal constraints
        4. Geospatial constraints

        :return: A dictionary of the 4 types of constraint.
        
        """
        # local import to prevent circular importing
        from .connection import query_keyword_type

        constraints_split = dict(
            (kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)

        for kw, val in constraints.items():
            constraint_type = query_keyword_type(kw)
            constraints_split[constraint_type][kw] = val

        return constraints_split

    def _build_query(self):
        """
        Build query string parameters as a dictionary.

        """

        query_dict = MultiDict({
            "query": self.freetext_constraint,
            "type": self.search_type,
            "latest": self.latest,
            "facets": self.facets,
            "fields": self.fields,
            "replica": self.replica,
        })

        query_dict.extend(self.facet_constraints)

        #!TODO: encode datetime
        start, end = self.temporal_constraint
        query_dict.update(start=start, end=end)

        return query_dict
Esempio n. 20
0
 def test_from_fieldstorage_without_filename(self):
     from pyesgf.multidict import MultiDict
     d = MultiDict()
     fs = DummyFieldStorage('a', '1')
     self.assertEqual(d.from_fieldstorage(fs), MultiDict({'a': '1'}))
Esempio n. 21
0
    def search(self,
               constraints=[('project', 'CORDEX')],
               query=None,
               start=None,
               end=None,
               limit=1,
               offset=0,
               search_type='Dataset',
               temporal=False):
        self.show_status("Starting ...", 0)

        from pyesgf.multidict import MultiDict
        my_constraints = MultiDict()
        for key, value in constraints:
            my_constraints.add(key, value)

        LOGGER.debug('constraints=%s', my_constraints)

        if not query or query == '*':
            query = None
        LOGGER.debug('query: %s', query)

        # TODO: check type of start, end
        LOGGER.debug('start=%s, end=%s', start, end)

        ctx = None
        if temporal is True:
            LOGGER.debug("using dataset search with time constraints")
            # TODO: handle timestamps in a better way
            timestamp_format = '%Y-%m-%dT%H:%M:%SZ'
            if start:
                from_timestamp = start.strftime(timestamp_format)
            else:
                from_timestamp = None
            if end:
                to_timestamp = end.strftime(timestamp_format)
            else:
                to_timestamp = None
            LOGGER.debug("from=%s, to=%s", from_timestamp, to_timestamp)
            ctx = self.conn.new_context(fields=self.fields,
                                        replica=self.replica,
                                        latest=self.latest,
                                        query=query,
                                        from_timestamp=from_timestamp,
                                        to_timestamp=to_timestamp)
        else:
            ctx = self.conn.new_context(fields=self.fields,
                                        replica=self.replica,
                                        latest=self.latest,
                                        query=query)
        if len(my_constraints) > 0:
            ctx = ctx.constrain(**my_constraints.mixed())

        LOGGER.debug('ctx: facet_constraints=%s, replica=%s, latests=%s',
                     ctx.facet_constraints, ctx.replica, ctx.latest)

        self.show_status("Datasets found=%d" % ctx.hit_count, 0)

        self.summary = dict(total_number_of_datasets=ctx.hit_count,
                            number_of_datasets=0,
                            number_of_files=0,
                            number_of_aggregations=0,
                            size=0)

        self.result = []

        self.count = 0
        # search datasets
        # we always do this to get the summary document
        datasets = ctx.search(ignore_facet_check=True)

        (self.start_index, self.stop_index,
         self.max_count) = self._index(datasets, limit, offset)
        self.summary['number_of_datasets'] = max(0, self.max_count)

        t0 = datetime.now()
        for i in range(self.start_index, self.stop_index):
            ds = datasets[i]
            # progress = self.count * 100.0 / self.max_count
            self.count = self.count + 1
            self.result.append(ds.json)
            for key in ['number_of_files', 'number_of_aggregations', 'size']:
                # LOGGER.debug(ds.json)
                self.summary[key] = self.summary[key] + ds.json.get(key, 0)

        self.summary['ds_search_duration_secs'] = (datetime.now() - t0).seconds
        self.summary['size_mb'] = self.summary.get('size', 0) / 1024 / 1024
        self.summary['size_gb'] = self.summary.get('size_mb', 0) / 1024

        LOGGER.debug('search_type = %s ', search_type)

        if search_type == 'Dataset':
            pass
        # search files (optional)
        elif search_type == 'File':
            self._file_search(datasets, my_constraints, start, end)
        # search aggregations (optional)
        elif search_type == 'Aggregation':
            self._aggregation_search(datasets, my_constraints)
        else:
            raise Exception('unknown search type: %s', search_type)

        LOGGER.debug('summary=%s', self.summary)
        self.show_status('Done', 100)

        return (self.result, self.summary, ctx.facet_counts)
Esempio n. 22
0
    def __init__(self,
                 connection,
                 constraints,
                 search_type=None,
                 latest=None,
                 facets=None,
                 fields=None,
                 from_timestamp=None,
                 to_timestamp=None,
                 replica=None,
                 shards=None):
        """

        :param connection: The SearchConnection
        :param constraints: A dictionary of initial constraints
        :param search_type: One of TYPE_* constants defining the document
            type to search for.  Overrides SearchContext.DEFAULT_SEARCH_TYPE
        :param facets: The list of facets for which counts will be retrieved
            and constraints be validated against.  Or None to represent all
            facets.
        :param fields: A list of field names to return in search responses
        :param replica: A boolean defining whether to return master records
            or replicas, or None to return both.
        :param latest: A boolean defining whether to return only latest verisons
            or only non-latest versions, or None to return both.
        :param shards: list of shards to restrict searches to.  Should be from the list
            self.connection.get_shard_list()
        :param from_timestamp: Date-time string to specify start of search range 
            (e.g. "2000-01-01T00:00:00Z"). 
        :param to_timestamp: Date-time string to specify end of search range
            (e.g. "2100-12-31T23:59:59Z").

        """

        self.connection = connection
        self.__facet_counts = None
        self.__hit_count = None

        if search_type is None:
            search_type = self.DEFAULT_SEARCH_TYPE

        #  Constraints
        self.freetext_constraint = None
        self.facet_constraints = MultiDict()
        self.temporal_constraint = [from_timestamp, to_timestamp]
        self.geosplatial_constraint = None

        self._update_constraints(constraints)

        # Search configuration parameters
        self.timestamp_range = (from_timestamp, to_timestamp)

        search_types = [TYPE_DATASET, TYPE_FILE, TYPE_AGGREGATION]
        if search_type not in search_types:
            raise EsgfSearchException('search_type must be one of %s' %
                                      ','.join(search_types))
        self.search_type = search_type

        self.latest = latest
        self.facets = facets
        self.fields = fields
        self.replica = replica
        self.shards = shards
Esempio n. 23
0
 def test_from_fieldstorage_without_filename(self):
     from pyesgf.multidict import MultiDict
     d = MultiDict()
     fs = DummyFieldStorage('a', '1')
     self.assertEqual(d.from_fieldstorage(fs), MultiDict({'a': '1'}))
Esempio n. 24
0
 def test_kwargs(self):
     from pyesgf.multidict import MultiDict
     md = MultiDict(kw1='val1')
     self.assertEqual(md._items, [('kw1', 'val1')])
Esempio n. 25
0
 def test_no_args(self):
     from pyesgf.multidict import MultiDict
     md = MultiDict()
     self.assertEqual(md._items, [])
Esempio n. 26
0
 def test_view_list(self):
     from pyesgf.multidict import MultiDict
     d = MultiDict()
     self.assertEqual(d.view_list([1, 2])._items, [1, 2])
Esempio n. 27
0
    def search(self, constraints=[('project', 'CORDEX')], query=None,
               start=None, end=None, limit=1, offset=0,
               search_type='Dataset',
               temporal=False):
        self.show_status("Starting ...", 0)

        from pyesgf.multidict import MultiDict
        my_constraints = MultiDict()
        for key, value in constraints:
            my_constraints.add(key, value)

        LOGGER.debug('constraints=%s', my_constraints)

        if not query or query == '*':
            query = None
        LOGGER.debug('query: %s', query)

        # TODO: check type of start, end
        LOGGER.debug('start=%s, end=%s', start, end)

        ctx = None
        if temporal is True:
            LOGGER.debug("using dataset search with time constraints")
            # TODO: handle timestamps in a better way
            # timestamp_format = '%Y-%m-%dT%H:%M:%SZ'
            if start:
                # from_timestamp = start.strftime(timestamp_format)
                from_timestamp = '{0}T12:00:00Z'.format(start.isoformat().strip())
            else:
                from_timestamp = None
            if end:
                # to_timestamp = end.strftime(timestamp_format)
                to_timestamp = '{0}T12:00:00Z'.format(end.isoformat().strip())
            else:
                to_timestamp = None
            LOGGER.debug("from=%s, to=%s", from_timestamp, to_timestamp)
            ctx = self.conn.new_context(fields=self.fields,
                                        replica=self.replica,
                                        latest=self.latest,
                                        query=query,
                                        from_timestamp=from_timestamp,
                                        to_timestamp=to_timestamp)
        else:
            ctx = self.conn.new_context(fields=self.fields,
                                        replica=self.replica,
                                        latest=self.latest,
                                        query=query)
        if len(my_constraints) > 0:
            ctx = ctx.constrain(**my_constraints.mixed())

        LOGGER.debug('ctx: facet_constraints=%s, replica=%s, latests=%s',
                     ctx.facet_constraints, ctx.replica, ctx.latest)

        self.show_status("Datasets found=%d" % ctx.hit_count, 0)

        self.summary = dict(total_number_of_datasets=ctx.hit_count,
                            number_of_datasets=0,
                            number_of_files=0,
                            number_of_aggregations=0,
                            size=0)

        self.result = []

        self.count = 0
        # search datasets
        # we always do this to get the summary document
        datasets = ctx.search(ignore_facet_check=True)

        (self.start_index, self.stop_index, self.max_count) = self._index(datasets, limit, offset)
        self.summary['number_of_datasets'] = max(0, self.max_count)

        t0 = datetime.now()
        for i in range(self.start_index, self.stop_index):
            ds = datasets[i]
            # progress = self.count * 100.0 / self.max_count
            self.count = self.count + 1
            self.result.append(ds.json)
            for key in ['number_of_files', 'number_of_aggregations', 'size']:
                # LOGGER.debug(ds.json)
                self.summary[key] = self.summary[key] + ds.json.get(key, 0)

        self.summary['ds_search_duration_secs'] = (datetime.now() - t0).seconds
        self.summary['size_mb'] = self.summary.get('size', 0) / 1024 / 1024
        self.summary['size_gb'] = self.summary.get('size_mb', 0) / 1024

        LOGGER.debug('search_type = %s ', search_type)

        if search_type == 'Dataset':
            pass
        # search files (optional)
        elif search_type == 'File':
            self._file_search(datasets, my_constraints, start, end)
        # search aggregations (optional)
        elif search_type == 'Aggregation':
            self._aggregation_search(datasets, my_constraints)
        else:
            raise Exception('unknown search type: %s', search_type)

        LOGGER.debug('summary=%s', self.summary)
        self.show_status('Done', 100)

        return (self.result, self.summary, ctx.facet_counts)
Esempio n. 28
0
 def test_view_list(self):
     from pyesgf.multidict import MultiDict
     d = MultiDict()
     self.assertEqual(d.view_list([1, 2])._items, [1, 2])