def set_total_start_rows_attributes(self, solr_json): """Sets the total_found, start, rows_attributes""" if (self.total_found is not None and self.start is not None and self.rows is not None): # Skip, we already did this. return None # The total found (numFound) is in the solr response. total_found = utilities.get_dict_path_value(['response', 'numFound'], solr_json) # Start and rows comes from the responseHeader start = utilities.get_dict_path_value( ['responseHeader', 'params', 'start'], solr_json) rows = utilities.get_dict_path_value( ['responseHeader', 'params', 'rows'], solr_json) if (total_found is None or start is None or rows is None): return None # Add number found, start index, paging # information about this search result. self.total_found = int(float(total_found)) self.start = int(float(start)) self.rows = int(float(rows))
def get_record_uuids_from_solr(solr_json): """Gets a list of UUIDs from the solr json response""" doc_list = utilities.get_dict_path_value(configs.RECORD_PATH_KEYS, solr_json, default=[]) uuids = [doc.get('uuid') for doc in doc_list if doc.get('uuid')] return uuids
def get_facets_and_options(self, solr_json): """Gets property facets and options from solr response json""" facets = [] solr_facet_fields_dict = utilities.get_dict_path_value( configs.FACETS_SOLR_ROOT_PATH_KEYS, solr_json) if not solr_facet_fields_dict: # No facets active, so skip out return facets for ( suffix, param_key, delim, facet_type, facet_labeling, ) in configs.FACETS_STANDARD: for ( solr_facet_field_key, solr_facet_value_count_list, ) in solr_facet_fields_dict.items(): if not solr_facet_field_key.endswith(suffix): # the type for field for the current suffix. continue # Make list of the tuples for this solr facet field. options_tuples = utilities.get_facet_value_count_tuples( solr_facet_value_count_list) if not len(options_tuples): # Skip, because we don't have any facet options continue facet = self.make_facet_dict_from_solr_field( solr_facet_field_key, facet_type, facet_labeling, ) if not facet: print('Strange. No facet object for {}'.format( solr_facet_field_key)) continue # Add options lists for different data-types present in # the options tuples list. facet = self.add_options_lists_to_facet( facet, solr_facet_field_key, param_key, delim, options_tuples) facets.append(facet) return facets
def get_record_uris_from_solr(solr_json): """Gets a list of URIs from the solr json response""" uri_list = [] doc_list = utilities.get_dict_path_value(configs.RECORD_PATH_KEYS, solr_json, default=[]) for doc in doc_list: if not doc.get('slug_type_uri_label'): continue item_dict = utilities.parse_solr_encoded_entity_str( doc.get('slug_type_uri_label'), ) uri = make_url_from_partial_url(item_dict.get('uri', '')) uri_list.append(uri) return uri_list
def add_publishing_datetime_metadata(self, solr_json, default_to_current=True): """Adds publishing modified and created metadata to the response JSON-LD""" # NOTE: Solr already defaults to representing time in # ISO 8601, so we're just pulling the appropriate time # info from the solr_json to metadata fields in our response # JSON-LD. meta_configs = [ ( # Last modified. 'dcmi:modified', (configs.STATS_FIELDS_PATH_KEYS + [ 'updated', 'max', ]), ), ( # Last published 'dcmi:created', (configs.STATS_FIELDS_PATH_KEYS + [ 'published', 'max', ]), ), ( # Earliest published 'oai-pmh:earliestDatestamp', (configs.STATS_FIELDS_PATH_KEYS + [ 'published', 'min', ]), ), ] for json_ld_key, path_keys_list in meta_configs: act_time = utilities.get_dict_path_value(path_keys_list, solr_json) if not act_time: # We could not find the time object. if not default_to_current: # Skip, since we're not to default to # the current time. continue # Add ISO 8601 current time for the missing value. act_time = time.strftime('%Y-%m-%dT%H:%M:%S') + 'Z' self.result[json_ld_key] = act_time
def add_form_use_life_date_range(self, solr_json, iso_year_format=True): """Adds earliest and latest form-use-life dates""" meta_configs = [ ( # Earliest date items formed, used, or alive. 'start', (configs.STATS_FIELDS_PATH_KEYS + [ 'form_use_life_chrono_earliest', 'min', ]), ), ( # Latest date items formed, used, or alive 'stop', (configs.STATS_FIELDS_PATH_KEYS + [ 'form_use_life_chrono_latest', 'max', ])), ] all_dates = [] for json_ld_key, path_keys_list in meta_configs: act_date = utilities.get_dict_path_value(path_keys_list, solr_json) if act_date is None: # We don't have a date for this. continue all_dates.append(act_date) if iso_year_format and act_date is not None: act_date = ISOyears().make_iso_from_float(act_date) self.result[json_ld_key] = act_date if not len(all_dates): # We don't have dates, so skip out. return None # Set the query result minimum and maximum date range. self.min_date = min(all_dates) self.max_date = max(all_dates)
def make_item_type_facets(self, solr_json): """Makes item_type facets from a solr_json response""" item_type_path_keys = ( configs.FACETS_SOLR_ROOT_PATH_KEYS + ['item_type'] ) item_type_val_count_list = utilities.get_dict_path_value( item_type_path_keys, solr_json, default=[] ) if not len(item_type_val_count_list): return None options_tuples = utilities.get_facet_value_count_tuples( item_type_val_count_list ) if not len(options_tuples): return None # Iterate through tuples of item_type counts options = [] for facet_value, count in options_tuples: # The get_item_type_dict should return the # type_dict for slugs, full uris, prefixed URIs # or lower-case item types. type_dict = utilities.get_item_type_dict( facet_value ) if not type_dict: # Unrecognized item type. Skip. continue sl = SearchLinks( request_dict=copy.deepcopy(self.request_dict), base_search_url=self.base_search_url ) # Remove non search related params. sl.remove_non_query_params() # Update the request dict for this facet option. sl.replace_param_value( 'type', match_old_value=None, new_value=facet_value, ) urls = sl.make_urls_from_request_dict() if urls['html'] == self.current_filters_url: # The new URL matches our current filter # url, so don't add this facet option. continue option = LastUpdatedOrderedDict() option['id'] = urls['html'] option['json'] = urls['json'] for key, val in type_dict.items(): option[key] = val options.append(option) if not len(options): return None facet = configs.FACETS_ITEM_TYPE.copy() facet['oc-api:has-id-options'] = options return facet
def get_facet_ranges_and_options(self, solr_json): """Gets property range facets and options from solr response json""" facet_ranges = [] solr_facet_ranges_dict = utilities.get_dict_path_value( configs.FACETS_RANGE_SOLR_ROOT_PATH_KEYS, solr_json) if not solr_facet_ranges_dict: # No facets ranges active, so skip out return facet_ranges # Now get the related stats fields. solr_stats_dict = utilities.get_dict_path_value( configs.STATS_FIELDS_PATH_KEYS, solr_json) if not solr_stats_dict: # No solr stats. So skip out. return None for ( solr_field_key, range_dict, ) in solr_facet_ranges_dict.items(): # Look up the client's request parameter and reqest ( param_key, match_old_value, ) = self.facet_fields_to_client_request.get( solr_field_key, ( 'prop', None, ) # default parameter, matching value. ) if not match_old_value: # This should never happen, but we can't associate a # this solr field with a client request param and value continue # Parse the solr field to get the data type data_type = utilities.get_data_type_for_solr_field(solr_field_key) if data_type not in [ 'xsd:integer', 'xsd:double', 'xsd:date', ]: # The data type for solr field is missing or # is of a type that does not have ranges. continue stats_dict = solr_stats_dict.get(solr_field_key) if not stats_dict: # We can't find stats for this solr field # that gave us ranges. Which should not happen, but # it did, so skip. continue # Get the raw list of value counts range_value_count_list = range_dict.get('counts', []) # Make list of the tuples for this solr facet field. options_tuples = utilities.get_facet_value_count_tuples( range_value_count_list) if not len(options_tuples): # Skip, because we don't have any facet range options continue facet_range = self.make_facet_dict_from_solr_field( solr_field_key, 'oc-api:range-facet', 'Ranges', range_data_type=data_type, ) for key in ['min', 'max', 'mean', 'stddev']: facet_range['oc-api:{}'.format(key)] = stats_dict[key] for key in ['gap']: facet_range['oc-api:{}'.format(key)] = range_dict[key] round_digits = None if data_type == 'xsd:double': digits = [ utilities.get_rounding_level_from_float(stats_dict[key]) for key in ['min', 'max'] ] round_digits = max(digits) # Now add the links do different options. facet_range[ 'oc-api:has-range-options'] = self.add_range_options_list( param_key, match_old_value, data_type, stats_dict['max'], options_tuples, round_digits=round_digits, ) facet_ranges.append(facet_range) return facet_ranges
def make_chronology_facet_options(self, solr_json): """Makes chronology facets from a solr_json response""" chrono_path_keys = ( configs.FACETS_SOLR_ROOT_PATH_KEYS + ['form_use_life_chrono_tile'] ) chrono_val_count_list = utilities.get_dict_path_value( chrono_path_keys, solr_json, default=[] ) if not len(chrono_val_count_list): return None options_tuples = utilities.get_facet_value_count_tuples( chrono_val_count_list ) if not len(options_tuples): return None # Check to see if the client included any request parameters # that limited the chronological range of the request. self._set_client_earliest_latest_limits() valid_tile_dicts = self._make_valid_options_tile_dicts( options_tuples ) if not len(valid_tile_dicts): # None of the chronological tiles are valid # given the query requirements. return None # Determine the aggregation depth needed to group chronological # tiles together into a reasonable number of options. self._get_tile_aggregation_depth(valid_tile_dicts) aggregate_tiles = {} for tile_dict in valid_tile_dicts: # Now aggregate the tiles. trim_tile_key = tile_dict['tile_key'][:self.default_aggregation_depth] if trim_tile_key not in aggregate_tiles: # Make the aggregate tile dictionary # object. chrono_t = ChronoTile() agg_dict = chrono_t.decode_path_dates(trim_tile_key) if (self.min_date is not None and agg_dict['earliest_bce'] < self.min_date): # The aggregated date range looks too early, so # set it to the earliest allowed. agg_dict['earliest_bce'] = self.min_date if (self.max_date is not None and agg_dict['latest_bce'] > self.max_date): # The aggregated date range looks too late, so # set it to the latest date range allowed. agg_dict['latest_bce'] = self.max_date agg_dict['tile_key'] = trim_tile_key agg_dict['count'] = 0 aggregate_tiles[trim_tile_key] = agg_dict aggregate_tiles[trim_tile_key]['count'] += tile_dict['count'] agg_tile_list = [tile_dict for _, tile_dict in aggregate_tiles.items()] # Now sort by earliest bce, then reversed latest bce # this makes puts early dates with longest timespans first sorted_agg_tiles = sorted( agg_tile_list, key=lambda k: (k['earliest_bce'], -k['latest_bce']) ) options = [] for tile_dict in sorted_agg_tiles: sl = SearchLinks( request_dict=copy.deepcopy(self.request_dict), base_search_url=self.base_search_url ) # Remove non search related params. sl.remove_non_query_params() # Update the request dict for this facet option. sl.replace_param_value( 'form-chronotile', match_old_value=None, new_value=tile_dict['tile_key'], ) sl.replace_param_value( 'form-start', match_old_value=None, new_value=tile_dict['earliest_bce'], ) sl.replace_param_value( 'form-stop', match_old_value=None, new_value=tile_dict['latest_bce'], ) urls = sl.make_urls_from_request_dict() if urls['html'] == self.current_filters_url: # The new URL matches our current filter # url, so don't add this facet option. continue option = LastUpdatedOrderedDict() option['id'] = urls['html'] option['json'] = urls['json'] option['count'] = tile_dict['count'] option['category'] = 'oc-api:chrono-facet' option['start'] = ISOyears().make_iso_from_float( tile_dict['earliest_bce'] ) option['stop'] = ISOyears().make_iso_from_float( tile_dict['latest_bce'] ) properties = LastUpdatedOrderedDict() properties['early bce/ce'] = tile_dict['earliest_bce'] properties['late bce/ce'] = tile_dict['latest_bce'] option['properties'] = properties options.append(option) return options
def make_records_from_solr(self, solr_json): """Makes record objects from solr_json""" records = [] doc_list = utilities.get_dict_path_value(configs.RECORD_PATH_KEYS, solr_json, default=[]) if not len(doc_list): return records # Gather the slugs for additional descriptive attributes # that we will add to the result records. requested_attrib_slugs = self._gather_requested_attrib_slugs() # Get the keyword search highlighting dict. Default # to an empty dict if there's no snippet highlighting. highlight_dict = solr_json.get('highlighting', {}) uuids = [] geo_uuids = [] string_pred_uuids = [] records = [] for solr_doc in doc_list: if not solr_doc.get('uuid'): # This shouldn't happen... logger.warn('Solr doc without a uuid. How?') continue # Create a result record object by processing the # solr_doc for the result item. rr = ResultRecord(solr_doc) rr.flatten_attributes = self.flatten_attributes rr.add_snippet_content(highlight_dict) uuids.append(rr.uuid) geo_uuid = get_geo_discovery_source_uuid(solr_doc) if geo_uuid == rr.uuid: # We only need to add geospatial feature # data if the disc_geosource is actually the # same item as the result record. Otherwise, # we will simply use the item's point data # to locate it. geo_uuids.append(geo_uuid) # Get all the linked data (standards) attributes # for this record. rec_ld_attributes = get_linked_data_attributes(solr_doc) # Only add those linked data (standards) attributes # that meet our limiting criteria. rr.ld_attributes = self._limit_attributes_by_request( desolr_attribute_tuples_slugs(rec_ld_attributes), requested_attrib_slugs, all_attribute_val=configs.REQUEST_ALL_LD_ATTRIBUTES) # Get all of the project-specific predicate attributes # for this result record. rec_pred_attributes = get_predicate_attributes(solr_doc) # Only add those project-specific predicate attributes # to the result record object that meet our limiting # criteria. rr.pred_attributes = self._limit_attributes_by_request( desolr_attribute_tuples_slugs(rec_pred_attributes), requested_attrib_slugs, all_attribute_val=configs.REQUEST_ALL_PROJ_ATTRIBUTES) # Add to the list of string predicate uuids gathered # from the attributes describing this record. string_pred_uuids += get_attribute_tuples_string_pred_uuids( rr.pred_attributes) # Add the result record object to the list of records. records.append(rr) # Remove the duplicates. string_pred_uuids = list(set(string_pred_uuids)) # Make a query to get a dict associating record uuids, string # predicate uuids, and their string content. uuid_pred_str_dict = self._get_string_attribute_values( uuids, string_pred_uuids) # Make a query to get any non-point geospatial feature data # associated with these result records. uuid_geo_dict = self._get_geo_features_objs(geo_uuids) for rr in records: rr.add_string_content(uuid_pred_str_dict) rr.add_non_point_geojson_coordinates(uuid_geo_dict) return records
def make_geo_contained_in_facet_options(self, solr_json): """Gets geospace item query set from a list of options tuples""" geosource_path_keys = (configs.FACETS_SOLR_ROOT_PATH_KEYS + ['disc_geosource']) geosource_val_count_list = utilities.get_dict_path_value( geosource_path_keys, solr_json, default=[]) if not len(geosource_val_count_list): return None # Make the list of tile, count tuples. options_tuples = utilities.get_facet_value_count_tuples( geosource_val_count_list) if not len(options_tuples): return None uuids = [] parsed_solr_entities = {} uuid_geo_dict = {} for solr_entity_str, count in options_tuples: parsed_entity = utilities.parse_solr_encoded_entity_str( solr_entity_str, base_url=self.base_url) if not parsed_entity: logger.warn( 'Cannot parse entity from {}'.format(solr_entity_str)) continue if not '/' in parsed_entity['uri']: logger.warn('Invalid uri from {}'.format(solr_entity_str)) continue uri_parts = parsed_entity['uri'].split('/') uuid = uri_parts[-1] parsed_entity['uuid'] = uuid parsed_solr_entities[solr_entity_str] = parsed_entity uuids.append(uuid) # Make a dictionary of geospace objects keyed by uuid. This # will hit the database in one query to get all geospace # objects not present in the cache. uuid_geo_dict = self._make_cache_geospace_obj_dict(uuids) # Make a dict of context paths, keyed by uuid. This will also # hit the database in only 1 query, for all context paths not # already present in the cache. uuid_context_dict = self._get_cache_contexts_dict(uuids) # Now make the final geo_options = [] for solr_entity_str, count in options_tuples: if solr_entity_str not in parsed_solr_entities: # This solr_entity_str did not validate to extract a UUID. continue parsed_entity = parsed_solr_entities[solr_entity_str] uuid = parsed_entity['uuid'] geo_obj = uuid_geo_dict.get(uuid) if geo_obj is None: logger.warn('No geospace object for {}'.format(uuid)) continue context_path = uuid_context_dict.get(uuid) if context_path is None: logger.warn('No context path for {}'.format(uuid)) continue sl = SearchLinks(request_dict=copy.deepcopy(self.request_dict), base_search_url=self.base_search_url) # Remove non search related params. sl.remove_non_query_params() # Update the request dict for this facet option. sl.replace_param_value( 'path', match_old_value=None, new_value=context_path, ) urls = sl.make_urls_from_request_dict() # NOTE: We're not checking if the URLs are the same # as the current search URL, because part of the point # of listing these features is for visualization display # in the front end. option = LastUpdatedOrderedDict() # The fragment id in the URLs are so we don't have an # ID collision with context facets. option['id'] = urls['html'] + '#geo-in' option['json'] = urls['json'] + '#geo-in' option['count'] = count option['type'] = 'Feature' option['category'] = 'oc-api:geo-contained-in-feature' # Add some general chronology information to the # geospatial feature. option = self._add_when_object_to_feature_option( uuid, option, ) # Add the geometry from the geo_obj coordinates. First # check to make sure they are OK with the the GeoJSON # right-hand rule. geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-in-geom-{}'.format(uuid) geometry['type'] = geo_obj.ftype coord_obj = json.loads(geo_obj.coordinates) v_geojson = ValidateGeoJson() coord_obj = v_geojson.fix_geometry_rings_dir( geo_obj.ftype, coord_obj) geometry['coordinates'] = coord_obj option['geometry'] = geometry properties = LastUpdatedOrderedDict() properties['id'] = '#geo-in-props-{}'.format(uuid) properties['href'] = option['id'] properties['item-href'] = parsed_entity['uri'] properties['label'] = context_path properties['feature-type'] = 'containing-region' properties['count'] = count properties['early bce/ce'] = self.min_date properties['late bce/ce'] = self.max_date option['properties'] = properties geo_options.append(option) return geo_options
def make_geotile_facet_options(self, solr_json): """Makes geographic tile facets from a solr_json response""" geotile_path_keys = (configs.FACETS_SOLR_ROOT_PATH_KEYS + ['discovery_geotile']) geotile_val_count_list = utilities.get_dict_path_value( geotile_path_keys, solr_json, default=[]) if not len(geotile_val_count_list): return None # Make the list of tile, count tuples. options_tuples = utilities.get_facet_value_count_tuples( geotile_val_count_list) if not len(options_tuples): return None valid_tile_tuples = self._make_valid_options_tile_tuples( options_tuples) if not len(valid_tile_tuples): # None of the chronological tiles are valid # given the query requirements. return None # Determine the aggregation depth needed to group geotiles # together into a reasonable number of options. self._get_tile_aggregation_depth(valid_tile_tuples) # Determine the min tile depth. We need to return this to # the client so the client knows not to over-zoom. tile_lens = [len(tile) for tile, _ in valid_tile_tuples] self.min_depth = min(tile_lens) # Get the client's requested feature type for the geotile # facets. feature_type = utilities.get_request_param_value( self.request_dict, param='geo-facet-type', default=self.default_tile_feature_type, as_list=False, solr_escape=False, ) if feature_type not in self.valid_tile_feature_types: # If the requested feature type is not in the # valid list of feature types, just use the default. feature_type = self.default_tile_feature_type aggregate_tiles = {} for tile, count in valid_tile_tuples: # Now aggregate the tiles. trim_tile_key = tile[:self.default_aggregation_depth] if trim_tile_key not in aggregate_tiles: # Make the aggregate tile with a count # of zero aggregate_tiles[trim_tile_key] = 0 aggregate_tiles[trim_tile_key] += count options = [] for tile, count in aggregate_tiles.items(): sl = SearchLinks(request_dict=copy.deepcopy(self.request_dict), base_search_url=self.base_search_url) # Remove non search related params. sl.remove_non_query_params() # Update the request dict for this facet option. sl.replace_param_value( 'disc-geotile', match_old_value=None, new_value=tile, ) urls = sl.make_urls_from_request_dict() if urls['html'] == self.current_filters_url: # The new URL matches our current filter # url, so don't add this facet option. continue option = LastUpdatedOrderedDict() option['id'] = urls['html'] option['json'] = urls['json'] option['count'] = count option['type'] = 'Feature' option['category'] = 'oc-api:geo-facet' # Add some general chronology information to the # geospatial tile. option = self._add_when_object_to_feature_option( tile, option, ) gm = GlobalMercator() if feature_type == 'Polygon': # Get polygon coordinates (a list of lists) geo_coords = gm.quadtree_to_geojson_poly_coords(tile) elif feature_type == 'Point': # Get point coordinates (a list of lon,lat values) geo_coords = gm.quadtree_to_geojson_lon_lat(tile) else: # We shouldn't be here! continue # Add the geometry object to the facet option. geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-disc-tile-geom-{}'.format(tile) geometry['type'] = feature_type geometry['coordinates'] = geo_coords option['geometry'] = geometry properties = LastUpdatedOrderedDict() properties['id'] = '#geo-disc-tile-{}'.format(tile) properties['href'] = option['id'] properties['label'] = 'Discovery region ({})'.format( (len(options) + 1)) properties['feature-type'] = 'discovery region (facet)' properties['count'] = count properties['early bce/ce'] = self.min_date properties['late bce/ce'] = self.max_date option['properties'] = properties options.append(option) return options