Exemple #1
0
def get_discovery_bbox_query_dict(raw_disc_bbox):
    """Makes a filter query for a discovery location bounding box"""
    query_dict = {'fq': []}
    terms = []
    bbox_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_disc_bbox, or_delim=configs.REQUEST_OR_OPERATOR)
    for bbox_str in bbox_list:
        bbox_coors = utilities.return_validated_bbox_coords(bbox_str)
        if not bbox_coors:
            # Not valid so skip out of the function
            return None
        # Valid bounding box, now make a solr-query
        # not how solr expacts latitude / longitude order, which
        # is the revserse of geojson!
        q_bbox = '[{lat_0},{lon_0} TO {lat_1},{lon_1}]'.format(
            lat_0=bbox_coors[1],
            lon_0=bbox_coors[0],
            lat_1=bbox_coors[3],
            lon_1=bbox_coors[2],
        )
        fq_term = 'discovery_geolocation: ' + q_bbox
        terms.append(fq_term)
    # Join the various bounding box query OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(terms, operator='OR'))
    return query_dict
Exemple #2
0
def get_object_uri_query_dict(raw_object_uri):
    """Make a query dict for object uris"""
    if not raw_object_uri:
        return None
    query_dict = {'fq': []}
    fq_terms = []

    values_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_object_uri, or_delim=configs.REQUEST_OR_OPERATOR)
    id_list = []
    for value in values_list:
        if not value:
            continue
        id_list += utilities.make_uri_equivalence_list(value)

    for act_id in id_list:
        # The act_id maybe a persistent URI, escape it and
        # query the persistent_uri string.
        escape_id = utilities.escape_solr_arg(act_id)
        fq_term = 'object_uri:{}'.format(escape_id)
        if fq_term in fq_terms:
            # We already have this, so skip
            continue
        fq_terms.append(fq_term)

    # Join the various object_uri queries as OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(fq_terms, operator='OR'))
    return query_dict
def test_infer_multiple_or_hierarchy_paths():
    """Tests creation of multiple hierarchy paths inferred from OR operators"""
    for raw_path, exp_paths, hierarchy_delim, or_delim in TESTS_MULTIPLE_OR_PATHS:
        paths_list = utilities.infer_multiple_or_hierarchy_paths(
            raw_path,
            hierarchy_delim=hierarchy_delim,
            or_delim=or_delim,
        )
        assert paths_list == exp_paths
Exemple #4
0
def get_general_hierarchic_paths_query_dict(
    raw_path,
    root_field,
    field_suffix,
    hierarchy_delim=configs.REQUEST_PROP_HIERARCHY_DELIM,
    or_delim=configs.REQUEST_OR_OPERATOR,
    obj_all_slug='',
    attribute_field_part='',
):
    """Make a solr query for a hierarchic raw path string that may have OR operations."""
    if not raw_path:
        return None
    query_dict = {'fq': [], 'facet.field': []}
    paths_as_lists = utilities.infer_multiple_or_hierarchy_paths(
        raw_path,
        hierarchy_delim=hierarchy_delim,
        or_delim=or_delim,
        get_paths_as_lists=True,
    )
    path_terms = []
    for path_list in paths_as_lists:
        path_query_dict = get_general_hierarchic_path_query_dict(
            path_list,
            root_field=root_field,
            field_suffix=field_suffix,
            obj_all_slug=obj_all_slug,
            attribute_field_part=attribute_field_part,
        )
        if not path_query_dict:
            # This path had no entities that could not be found in the
            # database. For now, just skip.
            continue
        # All the solr_query terms for a given hiearchic path need to
        # be satisfied to in a query. So join all the terms created from
        # a given hierarchic path with the "AND" operator into a single
        # string.
        path_term = utilities.join_solr_query_terms(path_query_dict['fq'],
                                                    operator='AND')
        # Add this path term to all the path terms.
        path_terms.append(path_term)
        # Add all of the path_query_dict keys, values to the main
        # query dict, except for values from the fq key, which we
        # further processed into a path_term.
        query_dict = utilities.combine_query_dict_lists(
            part_query_dict=path_query_dict,
            main_query_dict=query_dict,
            skip_keys=['fq'],
        )

    if not path_terms:
        return None
    # The different paths iterated above are all "OR" options (union)
    # for the different paths. Join those together using the OR
    # operator.
    all_paths_term = utilities.join_solr_query_terms(path_terms, operator='OR')
    query_dict['fq'] = [all_paths_term]
    return query_dict
    def _gather_requested_attrib_slugs(self):
        """Make a list of requested attribute slugs"""
        requested_attrib_slugs = []

        # Get all of the prop parameter values requested
        # by the client from the self.request_dict.
        raw_props_paths = utilities.get_request_param_value(
            self.request_dict,
            param='prop',
            default=[],
            as_list=True,
            solr_escape=False,
        )
        for raw_prop_path in raw_props_paths:
            # These can have OR conditions along with hierarchy
            # delimiters, so split these appart to get a list of
            # slugs.
            paths_as_lists = utilities.infer_multiple_or_hierarchy_paths(
                raw_prop_path,
                hierarchy_delim=configs.REQUEST_PROP_HIERARCHY_DELIM,
                or_delim=configs.REQUEST_OR_OPERATOR)
            for path_list in paths_as_lists:
                # Add the elements of this list to the list
                # of requested_attrib_slugs. Some of these won't be
                # 'property' (predicate) attributes, but that doesn't
                # matter. It's OK to have some noise in the
                # requested_attrib_slugs.
                requested_attrib_slugs += path_list

        # De-duplicate the slugs in the requested_attrib_slugs.
        requested_attrib_slugs = list(set(requested_attrib_slugs))

        raw_attributes = utilities.get_request_param_value(
            self.request_dict,
            param='attributes',
            default=None,
            as_list=False,
            solr_escape=False,
        )
        if not raw_attributes:
            # The client did not request additional attributes.
            return requested_attrib_slugs

        if configs.MULTIVALUE_ATTRIB_CLIENT_DELIM not in raw_attributes:
            attrib_list = [raw_attributes]
        else:
            attrib_list = raw_attributes.split(
                configs.MULTIVALUE_ATTRIB_CLIENT_DELIM)

        # De-duplicate the slugs in the requested_attrib_slugs.
        requested_attrib_slugs = list(set(requested_attrib_slugs +
                                          attrib_list))
        return requested_attrib_slugs
Exemple #6
0
def get_spatial_context_query_dict(spatial_context=None):
    '''Returns a query_dict object for a spatial_context path.
    
    :param str spatial_context: Raw spatial context path requested by
        the client.
    '''
    query_dict = {'fq': [], 'facet.field': []}
    if not spatial_context:
        query_dict['fq'] = []
        query_dict['facet.field'] = [SolrDocument.ROOT_CONTEXT_SOLR]
        return query_dict

    # Get a list of spatial context paths in the client request.
    # Multiple paths indicate an "OR" query, where the client is
    # requesting the union of different context paths.
    paths_list = utilities.infer_multiple_or_hierarchy_paths(
        spatial_context,
        hierarchy_delim=configs.REQUEST_CONTEXT_HIERARCHY_DELIM,
        or_delim=configs.REQUEST_OR_OPERATOR)
    # Look up slugs for the subjects entities identified by each of the
    # spatial context paths in the paths_list. The valid_context_slugs
    # is a list of slugs for entities successfully identified in a
    # database lookup of the spatial context paths.
    valid_context_slugs = get_valid_context_slugs(paths_list)
    path_terms = []
    for slug in valid_context_slugs:
        parent_slug = get_containment_parent_slug(slug)
        if not parent_slug:
            # An odd case where we don't have a parent slug.
            # Just continue so we don't trigger an error or have
            # weird behavior.
            continue
        path_term = utilities.make_solr_term_via_slugs(
            field_slug=parent_slug,
            solr_dyn_field=SolrDocument.FIELD_SUFFIX_CONTEXT,
            value_slug=slug,
        )
        path_terms.append(path_term)
        # Now add a field to the facet.field list so solr calculates
        # facets for any child contexts that may be contained inside
        # the context identified by the slug "slug".
        query_dict['facet.field'].append(
            slug.replace('-', '_') + SolrDocument.SOLR_VALUE_DELIM +
            SolrDocument.FIELD_SUFFIX_CONTEXT)
    # NOTE: Multiple path terms are the result of an "OR" (||) operator
    # in the client's request.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(path_terms, operator='OR'))
    return query_dict
Exemple #7
0
def make_tile_query_dict(raw_tile_path, solr_field, max_path_length):
    """Makes a filter query general tile path (geo or chrono)"""
    query_dict = {'fq': [], 'facet.field': [solr_field]}
    paths_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_tile_path, or_delim=configs.REQUEST_OR_OPERATOR)
    terms = []
    for path in paths_list:
        if len(path) < max_path_length:
            path += '*'
        fq_term = '{}:{}'.format(solr_field, path)
        terms.append(fq_term)
    # Join the various path queries as OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(terms, operator='OR'))
    return query_dict
Exemple #8
0
def get_simple_metadata_query_dict(raw_value, solr_field):
    """Gets a query dict for simple, standard metadata solr fields"""
    if not raw_value:
        return None
    query_dict = {'fq': []}
    values_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_value, or_delim=configs.REQUEST_OR_OPERATOR)
    terms = []
    for value in values_list:
        if not value:
            continue
        fq_term = '{}:{}'.format(solr_field, value)
        terms.append(fq_term)
    # Join the various path queries as OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(terms, operator='OR'))
    return query_dict
Exemple #9
0
def get_item_type_query_dict(raw_item_type):
    """Gets a query dict for item_types"""
    query_dict = {'fq': [], 'facet.field': []}
    paths_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_item_type, or_delim=configs.REQUEST_OR_OPERATOR)
    path_terms = []
    for item_type in paths_list:
        item_type_slug = configs.ITEM_TYPE_SLUG_MAPPINGS.get(item_type)
        if not item_type_slug:
            # We can't map the item type to a slug so skip.
            continue
        path_term = 'item_type:{}'.format(item_type)
        path_terms.append(path_term)
        # Now add a field to the facet.field list so solr calculates
        # facets for class_uris for the current item type.
        query_dict['facet.field'].append(
            item_type_slug.replace('-', '_') + SolrDocument.SOLR_VALUE_DELIM +
            SolrDocument.FIELD_SUFFIX_PREDICATE)
    # NOTE: Multiple item_type terms are the result of an "OR" (||) operator
    # in the client's request.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(path_terms, operator='OR'))
    return query_dict
Exemple #10
0
def get_identifier_query_dict(raw_identifier):
    """Make a query dict for identifiers"""
    if not raw_identifier:
        return None
    query_dict = {'fq': []}
    fq_terms = []

    values_list = utilities.infer_multiple_or_hierarchy_paths(
        raw_identifier,
        or_delim=configs.REQUEST_OR_OPERATOR,
        hierarchy_delim=None)
    id_list = []
    for value in values_list:
        if not value:
            continue
        id_list += utilities.make_uri_equivalence_list(value)

    for act_id in id_list:
        # The act_id maybe a persistent URI, escape it and
        # query the persistent_uri string.
        escape_id = utilities.escape_solr_arg(act_id)
        fq_terms.append('persistent_uri:{}'.format(escape_id))
        if ':' in act_id:
            # Skip below, because the act_id has a
            # character that's not in uuids or slugs.
            continue
        # The act_id maybe a UUID.
        fq_terms.append('uuid:{}'.format(act_id))
        # The act_id maybe a slug, so do a prefix query
        # for document slug_type_uri_label.
        fq_terms.append('slug_type_uri_label:{}'.format(
            utilities.fq_slug_value_format(act_id)))

    # Now make URIs in case we have a naked identifier
    prefix_removes = [
        'doi:', 'orcid:', 'http://dx.doi.org/', 'https://dx.doi.org/',
        'http://doi.org/', 'https://doi.org/'
    ]
    for value in values_list:
        if not value:
            continue
        for prefix in prefix_removes:
            # strip ID prefixes, case insensitive
            re_gone = re.compile(re.escape(prefix), re.IGNORECASE)
            identifier = re_gone.sub('', value)
            if (identifier.startswith('http://')
                    or identifier.startswith('https://')):
                continue

            # Only loop through URI templaces for N2T if
            # we have an ARK identifier.
            if identifier.startswith('ark:'):
                uri_templates = configs.N2T_URI_TEMPLATES
            else:
                uri_templates = configs.PERSISTENT_URI_TEMPLATES
            for uri_template in uri_templates:
                escaped_uri = utilities.escape_solr_arg(
                    uri_template.format(id=identifier))
                fq_term = 'persistent_uri:{}'.format(escaped_uri)
                if fq_term in fq_terms:
                    # We already have this, so skip.
                    continue
                fq_terms.append(fq_term)
        # Now see if there's a UUID in the identifier.
        oc_check = URImanagement.get_uuid_from_oc_uri(value, True)
        if oc_check:
            # We have an identifier we can interperate as an
            # Open Context URI. So extract the uuid part.
            fq_term = 'uuid:{}'.format(oc_check['uuid'])
            if fq_term in fq_terms:
                # We already have this, so skip.
                continue
            fq_terms.append('uuid:{}'.format(oc_check['uuid']))

    # Join the various identifier queries as OR terms.
    query_dict['fq'].append(
        utilities.join_solr_query_terms(fq_terms, operator='OR'))
    return query_dict