Esempio n. 1
0
def AdvancedSearchForm(*args, **kwargs):
    search_url, store = kwargs.pop('search_url'), kwargs.pop('store')

    q = {'size': 0,
         'facets': {'formalOrganisation': {'terms': {'field': 'formalOrganisation.uri'}},
                    'basedNear': {'terms': {'field': 'basedNear.uri'}}}}

    results = json.load(urllib2.urlopen(search_url, json.dumps(q)))

    formal_organisation_choices = [t['term'] for t in results['facets']['formalOrganisation']['terms']]
    based_near_choices = [t['term'] for t in results['facets']['basedNear']['terms']]

    labels = get_labels(map(rdflib.URIRef, formal_organisation_choices + based_near_choices),
                        endpoint=store.query_endpoint)

    formal_organisation_choices = [('', '-'*20)]+[(uri, labels.get(rdflib.URIRef(uri), uri)) for uri in formal_organisation_choices]
    based_near_choices = [('', '-'*20)]+[(uri, labels.get(rdflib.URIRef(uri), uri)) for uri in based_near_choices]

    attrs = {'q': forms.CharField(label='Query'),
             'filter.basedNear.uri': forms.ChoiceField(label='Location',
                                                       choices=based_near_choices,
                                                       initial='',
                                                       required=False),
             'filter.formalOrganisation.uri': forms.ChoiceField(label='Institution',
                                                                choices=formal_organisation_choices,
                                                                initial='http://oxpoints.oucs.ox.ac.uk/id/00000000',
                                                                required=False)}
    form = type('AdvancedSearchForm', (forms.Form,), attrs)

    return form(*args, **kwargs)
Esempio n. 2
0
    def get_results(self, parameters, cleaned_data):
        page = cleaned_data.get('page') or 1
        page_size = cleaned_data.get('page_size') or self.page_size
        start = (page - 1) * page_size

        query = self.get_query(parameters, cleaned_data, start, page_size)

        # If there aren't any filters defined, we don't want a filter part of
        # our query.
        if 'filter' in query:
            if 'and' in query['filter'] and not query['filter']['and']:
                del query['filter']['and']
            if not query['filter']:
                del query['filter']

        results = self.search_endpoint.query(query)

        results.update(self.get_pagination(page_size, page, start, results))
        results['q'] = cleaned_data['q']

        facet_labels = set()
        for key in query['facets']:
            meta = results['facets'][key]['meta'] = query['facets'][key]
            filter_value = parameters.get(
                'filter.%s' % query['facets'][key]['terms']['field'])
            results['facets'][key]['filter'] = {
                'present': filter_value is not None,
                'value': filter_value
            }
            if meta['terms']['field'].endswith('.uri'):
                for term in results['facets'][key]['terms']:
                    facet_labels.add(term['term'])
                    term['value'] = contract(term['term'])
            else:
                for term in results['facets'][key]['terms']:
                    term['value'] = term['term']

        labels = get_labels(map(rdflib.URIRef, facet_labels),
                            endpoint=self.endpoint)
        for key in query['facets']:
            if results['facets'][key]['meta']['terms']['field'].endswith(
                    '.uri'):
                for term in results['facets'][key]['terms']:
                    uri = rdflib.URIRef(term['term'])
                    if uri in labels:
                        term['label'] = unicode(labels[uri])

        for hit in results['hits']['hits']:
            try:
                hit['_url'] = doc_forwards(hit['_source']['uri'])[None]
            except KeyError:
                raise

        return results
Esempio n. 3
0
 def with_labels(self, triples):
     subjects = set()
     already_labelled = set()
     for s, p, o in triples:
         yield s, p, o
         if p in label_predicates:
             already_labelled.add(s)
         if isinstance(s, rdflib.URIRef):
             subjects.add(s)
         if isinstance(o, rdflib.URIRef):
             subjects.add(o)
     for triple in get_labels(subjects - already_labelled, self.endpoint, mapping=False):
         yield triple
Esempio n. 4
0
    def get_results(self, parameters, cleaned_data):
        page = cleaned_data.get('page') or 1
        page_size = cleaned_data.get('page_size') or self.page_size
        start = (page - 1) * page_size

        query = self.get_query(parameters, cleaned_data, start, page_size)

        # If there aren't any filters defined, we don't want a filter part of
        # our query.
        if 'filter' in query:
            if 'and' in query['filter'] and not query['filter']['and']:
                del query['filter']['and']
            if not query['filter']:
                del query['filter']

        results = self.search_endpoint.query(query)

        results.update(self.get_pagination(page_size, page, start, results))
        results['q'] = cleaned_data['q']

        facet_labels = set()
        for key in query['facets']:
            meta = results['facets'][key]['meta'] = query['facets'][key]
            filter_value = parameters.get('filter.%s' % query['facets'][key]['terms']['field'])
            results['facets'][key]['filter'] = {'present': filter_value is not None,
                                                'value': filter_value}
            if meta['terms']['field'].endswith('.uri'):
                for term in results['facets'][key]['terms']:
                    facet_labels.add(term['term'])
                    term['value'] = contract(term['term'])
            else:
                for term in results['facets'][key]['terms']:
                    term['value'] = term['term']

        labels = get_labels(map(rdflib.URIRef, facet_labels), endpoint=self.endpoint)
        for key in query['facets']:
            if results['facets'][key]['meta']['terms']['field'].endswith('.uri'):
                for term in results['facets'][key]['terms']:
                    uri = rdflib.URIRef(term['term'])
                    if uri in labels:
                        term['label'] = unicode(labels[uri])

        for hit in results['hits']['hits']:
            try:
                hit['_url'] = doc_forwards(hit['_source']['uri'])[None]
            except KeyError:
                raise

        return results
Esempio n. 5
0
    def get_results(self, parameters, cleaned_data):
        page = cleaned_data.get('page') or 1
        page_size = cleaned_data.get('page_size') or self.page_size
        start = (page - 1) * page_size

        query = {
            'query': {'query_string': {'query': cleaned_data['q'],
                                       'default_operator': 'AND'}},
            'from': start,
            'size': page_size,
            # A blank conjunctive filter. We'll remove this later if necessary.
            'filter': {'and': []},
        }

        # Parse query parameters of the form 'filter.FIELDNAME'.
        for key in list(parameters):
            parameter = parameters[key]
            if key.startswith('filter.'):
                if not parameter:
                    del parameters[key]
                    continue
                elif parameter == '-':
                    filter = {'missing': {'field': key[7:]}}
                else:
                    if key.endswith('.uri') and ':' in parameter:
                        parameter = expand(parameter)
                    filter = {'term': {key[7:]: parameter}}
                query['filter']['and'].append(filter)

        # If there aren't any filters defined, we don't want a filter part of
        # our query.
        if not query['filter']['and']:
            del query['filter']['and']
        if not query['filter']:
            del query['filter']

        if self.facets:
            # Copy the facet definitions as we'll be playing with them shortly.
            facets = copy.deepcopy(self.facets)

            # Add facet filters for all active filters except any acting on this
            # particular facet.
            if 'filter' in query:
                for facet in facets.itervalues():
                    for filter in query['filter']['and']:
                        if facet['terms']['field'] not in (filter.get('term') or filter['missing']):
                            if 'facet_filter' not in facet:
                                facet['facet_filter'] = {'and': []}
                            facet['facet_filter']['and'].append(filter)
            query['facets'] = facets

        response = urllib2.urlopen(self.search_url, json.dumps(query))
        results = self.Deunderscorer(json.load(response))

        results.update(self.get_pagination(page_size, page, start, results))
        results['q'] = cleaned_data['q']

        facet_labels = set()
        for key in query['facets']:
            meta = results['facets'][key]['meta'] = query['facets'][key]
            filter_value = parameters.get('filter.%s' % query['facets'][key]['terms']['field'])
            results['facets'][key]['filter'] = {'present': filter_value is not None,
                                                'value': filter_value}
            if meta['terms']['field'].endswith('.uri'):
                for term in results['facets'][key]['terms']:
                    facet_labels.add(term['term'])
                    term['value'] = contract(term['term'])
            else:
                for term in results['facets'][key]['terms']:
                    term['value'] = term['term']
        
        labels = get_labels(facet_labels, endpoint=self.endpoint)
        for key in query['facets']:
            if results['facets'][key]['meta']['terms']['field'].endswith('.uri'):
                for term in results['facets'][key]['terms']:
                    uri = URIRef(term['term'])
                    if uri in labels:
                        term['label'] = unicode(labels[uri])

        for hit in results['hits']['hits']:
            try:
                hit['_url'] = doc_forwards(hit['_source']['uri'])[None]
            except KeyError:
                raise

        return results