def generate_field_counts(self, response, views, whoosh_query): """ Handles all the count by field value views for a query. All values of a multiple-valued field are counted. """ logger.debug(self.tracking_code + " generating field counts for fields: %s" % (' '.join(v['field'] for v in views.itervalues()))) for view_id, view in views.iteritems(): response[view_id] = {'counts': {}} logger.debug(self.tracking_code + " whoosh_query: " + repr(whoosh_query)) logger.debug(self.tracking_code + " view: " + json.dumps(views)) with self.whoosh_index.searcher() as searcher: hits = searcher.search(whoosh_query, limit=None) logger.info(self.tracking_code + " whoosh search results: %s" % (repr(hits))) for hit in hits: for view_id, view in views.iteritems(): field = view['field'] field = domain_config.field_name_aliases(field) or field if field in hit: values = set(v for v in whooshutils.split_keywords(hit[field])) counts = response[view_id]['counts'] for value in values: counts.setdefault(value, 0) counts[value] += 1 for view_id, view in views.iteritems(): counts = response[view_id]['counts'].items() counts.sort(key=lambda (v, c): c, reverse=True) response[view_id]['counts'] = counts
def constraint_to_whoosh_query(self, cnstr): """ Produces a Whoosh query (using the python object format, not the text format) representing a single constraint. cnstr: The constraint as JSON (as python objects). """ type = cnstr['type'] if type == 'fieldvalue': field = cnstr['field'] field = domain_config.field_name_aliases(field) or field return whoosh.query.Term( field, whooshutils.escape_keyword(cnstr['value'])) if type == 'textsearch': return self.query_parser.parse(cnstr['value']) elif type == "timerange": low, high = cnstr['low'], cnstr['high'] return whoosh.query.NumericRange('year', low, high) elif type == 'referencepoints': return whoosh.query.Or([ whoosh.query.Term('referencePoints', whooshutils.escape_keyword(p)) for p in cnstr['points'] ]) elif type == 'tsneCoordinates': return whoosh.query.Or( [whoosh.query.Term('id', int(p)) for p in cnstr['points']]) else: raise ValueError("unknown constraint type \"%s\"" % (type))
def constraint_to_whoosh_query(self, cnstr): """ Produces a Whoosh query (using the python object format, not the text format) representing a single constraint. cnstr: The constraint as JSON (as python objects). """ type = cnstr['type'] if type == 'fieldvalue': field = cnstr['field'] field = domain_config.field_name_aliases(field) or field return whoosh.query.Term(field, whooshutils.escape_keyword(cnstr['value'])) if type == 'textsearch': return self.query_parser.parse(cnstr['value']) elif type == "timerange": low, high = cnstr['low'], cnstr['high'] return whoosh.query.NumericRange('year', low, high) elif type == 'referencepoints': return whoosh.query.Or([whoosh.query.Term('referencePoints', whooshutils.escape_keyword(p)) for p in cnstr['points']]) elif type == 'tsneCoordinates': return whoosh.query.Or([whoosh.query.Term('id', int(p)) for p in cnstr['points']]) else: raise ValueError("unknown constraint type \"%s\"" % (type))
def generate_field_counts(self, response, views, whoosh_query): """ Handles all the count by field value views for a query. All values of a multiple-valued field are counted. """ logger.debug(self.tracking_code + " generating field counts for fields: %s" % (' '.join(v['field'] for v in views.itervalues()))) for view_id, view in views.iteritems(): response[view_id] = {'counts': {}} logger.debug(self.tracking_code + " whoosh_query: " + repr(whoosh_query)) logger.debug(self.tracking_code + " view: " + json.dumps(views)) with self.whoosh_index.searcher() as searcher: hits = searcher.search(whoosh_query, limit=None) logger.info(self.tracking_code + " whoosh search results: %s" % (repr(hits))) for hit in hits: for view_id, view in views.iteritems(): field = view['field'] field = domain_config.field_name_aliases(field) or field if field in hit: values = set( v for v in whooshutils.split_keywords(hit[field])) counts = response[view_id]['counts'] for value in values: counts.setdefault(value, 0) counts[value] += 1 for view_id, view in views.iteritems(): counts = response[view_id]['counts'].items() counts.sort(key=lambda (v, c): c, reverse=True) response[view_id]['counts'] = counts