Example #1
0
    def generate_field_counts(self, response, views, whoosh_query):
        """
        Handles all the count by field value views for a query. All values of a
        multiple-valued field are counted.
        """

        logger.debug(self.tracking_code + " generating field counts for fields: %s" % (' '.join(v['field'] for v in views.itervalues())))

        for view_id, view in views.iteritems():
            response[view_id] = {'counts': {}}

        logger.debug(self.tracking_code + " whoosh_query: " + repr(whoosh_query))
        logger.debug(self.tracking_code + " view: " + json.dumps(views))

        with self.whoosh_index.searcher() as searcher:
            hits = searcher.search(whoosh_query, limit=None)
            logger.info(self.tracking_code + " whoosh search results: %s" % (repr(hits)))
            for hit in hits:
                for view_id, view in views.iteritems():
                    field = view['field']
                    field = domain_config.field_name_aliases(field) or field
                    if field in hit:
                        values = set(v for v in whooshutils.split_keywords(hit[field]))
                        counts = response[view_id]['counts']
                        for value in values:
                            counts.setdefault(value, 0)
                            counts[value] += 1

        for view_id, view in views.iteritems():
            counts = response[view_id]['counts'].items()
            counts.sort(key=lambda (v, c): c, reverse=True)
            response[view_id]['counts'] = counts
Example #2
0
    def constraint_to_whoosh_query(self, cnstr):
        """
        Produces a Whoosh query (using the python object format, not the text
        format) representing a single constraint.
        cnstr: The constraint as JSON (as python objects).
        """

        type = cnstr['type']
        if type == 'fieldvalue':
            field = cnstr['field']
            field = domain_config.field_name_aliases(field) or field
            return whoosh.query.Term(
                field, whooshutils.escape_keyword(cnstr['value']))
        if type == 'textsearch':
            return self.query_parser.parse(cnstr['value'])
        elif type == "timerange":
            low, high = cnstr['low'], cnstr['high']
            return whoosh.query.NumericRange('year', low, high)
        elif type == 'referencepoints':
            return whoosh.query.Or([
                whoosh.query.Term('referencePoints',
                                  whooshutils.escape_keyword(p))
                for p in cnstr['points']
            ])
        elif type == 'tsneCoordinates':
            return whoosh.query.Or(
                [whoosh.query.Term('id', int(p)) for p in cnstr['points']])
        else:
            raise ValueError("unknown constraint type \"%s\"" % (type))
Example #3
0
    def constraint_to_whoosh_query(self, cnstr):
        """
        Produces a Whoosh query (using the python object format, not the text
        format) representing a single constraint.
        cnstr: The constraint as JSON (as python objects).
        """

        type = cnstr['type']
        if type == 'fieldvalue':
            field = cnstr['field']
            field = domain_config.field_name_aliases(field) or field
            return whoosh.query.Term(field, whooshutils.escape_keyword(cnstr['value']))
        if type == 'textsearch':
            return self.query_parser.parse(cnstr['value'])
        elif type == "timerange":
            low, high = cnstr['low'], cnstr['high']
            return whoosh.query.NumericRange('year', low, high)
        elif type == 'referencepoints':
            return whoosh.query.Or([whoosh.query.Term('referencePoints', whooshutils.escape_keyword(p)) for p in cnstr['points']])
        elif type == 'tsneCoordinates':
            return whoosh.query.Or([whoosh.query.Term('id', int(p)) for p in cnstr['points']])
        else:
            raise ValueError("unknown constraint type \"%s\"" % (type))
Example #4
0
    def generate_field_counts(self, response, views, whoosh_query):
        """
        Handles all the count by field value views for a query. All values of a
        multiple-valued field are counted.
        """

        logger.debug(self.tracking_code +
                     " generating field counts for fields: %s" %
                     (' '.join(v['field'] for v in views.itervalues())))

        for view_id, view in views.iteritems():
            response[view_id] = {'counts': {}}

        logger.debug(self.tracking_code + " whoosh_query: " +
                     repr(whoosh_query))
        logger.debug(self.tracking_code + " view: " + json.dumps(views))

        with self.whoosh_index.searcher() as searcher:
            hits = searcher.search(whoosh_query, limit=None)
            logger.info(self.tracking_code + " whoosh search results: %s" %
                        (repr(hits)))
            for hit in hits:
                for view_id, view in views.iteritems():
                    field = view['field']
                    field = domain_config.field_name_aliases(field) or field
                    if field in hit:
                        values = set(
                            v for v in whooshutils.split_keywords(hit[field]))
                        counts = response[view_id]['counts']
                        for value in values:
                            counts.setdefault(value, 0)
                            counts[value] += 1

        for view_id, view in views.iteritems():
            counts = response[view_id]['counts'].items()
            counts.sort(key=lambda (v, c): c, reverse=True)
            response[view_id]['counts'] = counts