def testquery(): """ Returns the query expressed in elastics search api """ auth, permitted = validate_user(mode="read") try: from translator import parser query = request.query_string # default settings = parser.make_settings(permitted, {'size': 25, 'page': 0}) elasticq = parser.parse(query, settings) mode = settings['mode'] if not settings.get('sort', ''): # default: group by lexicon, then sort by score sort = configM.searchfield(mode, 'sort_by') else: sort = settings['sort'] start = settings['start'] if 'start' in settings\ else settings['page'] * settings['size'] query = unquote(query) elasticq = parser.parse(query) return elasticq + dumps({ 'sort': sort, '_from': start, 'size': settings['size'], 'version': 'true' }) except Exception as e: # catch *all* exceptions # TODO only catch relevant exceptions logging.exception(e) raise eh.KarpGeneralError(e, query)
def checksuggestion(lexicon, _id): from dbhandler.dbhandler import dbselect # TODO add exception handling try: return jsonify({'updates': dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)}) except Exception as e: raise eh.KarpGeneralError(str(e))
def acceptsuggestion(lexicon, _id): try: ans = savesuggestion(lexicon, _id) return jsonify(ans) except (esExceptions.RequestError, esExceptions.TransportError) as e: update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'accept') raise eh.KarpElasticSearchError("Error during update. Document not saved.", debug_msg=str(e)) except Exception as e: update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'accept') raise eh.KarpGeneralError(str(e))
def checkuserhistory(): """ Shows the updates a user has made """ try: auth, permitted = validate_user() user = helpers.get_user() except AttributeError: raise eh.KarpGeneralError('No user name provided', 'checkuserhistory') try: size = helpers.get_size(default=10, settings={'allowed': permitted}) from src.dbhandler.dbhandler import dbselect updates = [] for lexicon in permitted: # add updates from lexicons that are kept in sql if configM.get_lexicon_sql(lexicon): updates.extend(dbselect(lexicon, user=user, max_hits=size)) return jsonify({'updates': updates}) except Exception as e: logging.exception(e) raise eh.KarpGeneralError(str(e))
def lookup_multiple_spec(field, mode=standardmode): try: val = get_value(field, mode) if type(val) is dict: return ([val["search"]], (val["path"], val["typefield"], val["type"])) else: return (val, '') except Exception as e: msg = "Field %s not found in mode %s" % (field, mode) logging.error(msg+": ") logging.exception(e) raise eh.KarpGeneralError(msg)
def query(page=0): try: ans = requestquery(page=page) return jsonify(ans) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except Exception as e: # catch *all* exceptions and show for user logging.exception(e) raise eh.KarpGeneralError(str(e), user_msg=str(e), query=request.query_string)
def statlist(): # TODO add is_more here (as above) """ Returns the counts and stats for the query """ auth, permitted = validate_user(mode="read") try: query = request.query_string mode = parser.get_mode(query) logging.debug('mode is %s' % mode) default = { "buckets": configM.searchfield(mode, 'statistics_buckets'), "size": 100, "cardinality": False } settings = parser.make_settings(permitted, default) exclude = [] if auth else configM.searchfield(mode, 'secret_fields') elasticq, more = parser.statistics(query, settings, exclude=exclude, prefix='STAT_') es = configM.elastic(mode=settings['mode']) is_more = check_bucketsize(more, settings["size"], mode, es) # TODO allow more than 100 000 hits here? size = settings['size'] index, typ = configM.get_mode_index(settings['mode']) ans = es.search(index=index, body=loads(elasticq), search_type="count", size=size) tables = [] for key, val in ans['aggregations']['q_statistics'].items(): if key.startswith('STAT_'): tables.extend(generate_table(val, [])) # the length of tables might be longer than size, so truncate it # generating shorter tables is not faster than generating all of it # and then truncating if size: tables = tables[:size] return jsonify({"stat_table": tables, "is_more": is_more}) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except PErr.AuthenticationError as e: logging.exception(e) msg = e.message raise eh.KarpAuthenticationError(msg) except Exception as e: # catch *all* exceptions # raise logging.exception(e) raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
def minientry(): """ Returns the counts and stats for the query """ max_page = configM.setupconfig['MINIENTRY_PAGE'] auth, permitted = validate_user(mode="read") try: query = request.query_string mode = parser.get_mode(query) default = { 'show': configM.searchfield(mode, 'minientry_fields'), 'size': 25 } settings = parser.make_settings(permitted, default) elasticq = parser.parse(query, settings) show = settings['show'] if not auth: # show = show - exclude exclude = configM.searchfield(mode, 'secret_fields') show = list(set(show).difference(exclude)) sort = sortorder(settings, mode, settings.get('query_command', '')) start = settings['start'] if 'start' in settings else 0 es = configM.elastic(mode=settings['mode']) index, typ = configM.get_mode_index(settings['mode']) ans = parser.adapt_query( settings['size'], start, es, loads(elasticq), { 'index': index, '_source': show, 'from_': start, 'sort': sort, 'size': min(settings['size'], max_page), 'search_type': 'dfs_query_then_fetch' }) if settings.get('highlight', False): clean_highlight(ans) return jsonify(ans) except PErr.AuthenticationError as e: logging.exception(e) msg = e.message raise eh.KarpAuthenticationError(msg) except PErr.QueryError as e: raise eh.KarpQueryError("Parse error, %s" % e.message, debug_msg=e, query=query) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except Exception as e: # catch *all* exceptions logging.exception(e) raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
def read_data(): """ Read the incoming data, make sure a message exists Raise errors if data is not well-formatted """ try: request.get_data() data = loads(request.data) except ValueError as e: raise eh.KarpParsingError(str(e)) if 'message' not in data: # fail if message is not there raise eh.KarpGeneralError('Input data not ok') if not data: errstr = "The source is empty. Empty documents not allowed" raise eh.KarpParsingError(errstr) return data
def searchconf(mode, field, failonerror=True): # looks up field in modes.json, eg. "autocomplete" # returns the karp field name (eg. baseform.raw) try: logging.debug('\n%s\n' % searchconfig[mode]) return searchconfig[mode][field] except Exception as e: if mode not in searchconfig: msg = "Mode %s not found" % mode else: msg = "Config field %s not found in mode %s" % (field, mode) logging.error(msg + ": ") logging.exception(e) if failonerror: raise eh.KarpGeneralError(msg) return ''
def acceptmodified(lexicon, _id): try: request.get_data() data = loads(request.data) modified_doc = data ans = savesuggestion(lexicon, _id, status='accepted_modified', source=modified_doc) return jsonify(ans) except (esExceptions.RequestError, esExceptions.TransportError) as e: logging.exception(e) update.handle_update_error(e, {"id": _id, "data": data}, helpers.get_user(), 'accept modified') raise eh.KarpElasticSearchError("Error during update. Document not saved.", debug_msg=str(e)) except Exception as e: logging.exception(e) update.handle_update_error(e, {"id": _id, "data": data}, helpers.get_user(), 'accept modified') raise eh.KarpGeneralError(str(e))
def rejectsuggestion(lexicon, _id): from dbhandler.dbhandler import dbselect try: origin = dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)[0] except Exception as e: # if error occurs here, the suggestion is not in sql raise eh.KarpDbError('Rejection not found', 'Rejection not found: %s' % str(e)) auth, permitted = validate_user() set_lexicon = origin["doc"]["lexiconName"] helpers.check_lexiconName(lexicon, set_lexicon, 'rejectsuggestion', _id) if lexicon not in permitted: raise eh.KarpAuthenticationError('You are not allowed to update lexicon %s' % lexicon) try: origin = dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)[0] # delete from suggestion index # the user log in is checked in delete_entry # delete_entry raises exception if ES fails sugg_index, typ = configM.get_lexicon_suggindex(lexicon) ans = update.delete_entry(lexicon, _id, sql=False, live=False, suggestion=True) request.get_data() data = loads(request.data) message = data.get('message') # mark as rejected ok, err = update.modify_db(_id, lexicon, message, "rejected") ans['sugg_db_loaded'] = ok if not ok: logging.debug(err) update.send_notification(origin['user'], message, _id, "rejected") return jsonify(ans) except (esExceptions.RequestError, esExceptions.TransportError) as e: update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'reject') raise eh.KarpElasticSearchError("Error during update. Document not saved.", debug_msg=str(e)) except Exception as e: update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'reject') raise eh.KarpGeneralError(str(e))
def checklexiconhistory(lexicon, date): """ Shows the updates on one lexicon """ try: auth, permitted = validate_user() if lexicon not in permitted: raise eh.KarpAuthenticationError( 'You are not allowed to update lexicon %s' % lexicon) settings = {"allowed": permitted} helpers.get_querysettings(settings) size = settings.get('size', 10) status = settings.get('status', ['added', 'changed', 'removed']) from src.dbhandler.dbhandler import dbselect return jsonify({ 'resource': lexicon, 'updates': dbselect(lexicon, status=status, from_date=date, max_hits=size) }) except Exception as e: raise eh.KarpGeneralError(str(e))
def random(): auth, permitted = validate_user(mode="read") try: query = request.query_string mode = parser.get_mode(query) default = { "show": configM.searchfield(mode, 'minientry_fields'), "size": 1 } settings = parser.make_settings(permitted, default) elasticq = parser.random(query, settings) es = configM.elastic(mode=mode) index, typ = configM.get_mode_index(mode) es_q = { 'index': index, 'body': loads(elasticq), 'size': settings['size'] } if settings['show']: show = settings['show'] if not auth: # show = show - exclude exclude = configM.searchfield(mode, 'secret_fields') show = list(set(show).difference(exclude)) es_q['_source'] = show ans = es.search(**es_q) return jsonify(ans) except PErr.AuthenticationError as e: logging.exception(e) msg = e.message raise eh.KarpAuthenticationError(msg) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except Exception as e: # catch *all* exceptions logging.exception(e) raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
def statistics(): """ Returns the counts and stats for the query """ auth, permitted = validate_user(mode="read") try: query = request.query_string mode = parser.get_mode(query) default = { "buckets": configM.searchfield(mode, 'statistics_buckets'), "size": 100, "cardinality": False } settings = parser.make_settings(permitted, default) exclude = [] if auth else configM.searchfield(mode, 'secret_fields') elasticq, more = parser.statistics(query, settings, exclude=exclude) es = configM.elastic(mode=settings['mode']) is_more = check_bucketsize(more, settings, mode, es) index, typ = configM.get_mode_index(settings['mode']) # TODO allow more than 100 000 hits here? ans = es.search(index=index, body=loads(elasticq), search_type="count", size=settings['size']) ans["is_more"] = is_more return jsonify(ans) except PErr.AuthenticationError as e: logging.exception(e) msg = e.message raise eh.KarpAuthenticationError(msg) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except Exception as e: # catch *all* exceptions logging.exception(e) raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
def autocomplete(): """ Returns lemgrams matching the query text. Each mode specifies in the configs which fields that should be considered. The parameter 'q' or 'query' is used when only one word form is to be processed. The parameter 'multi' is used when multiple word forms should be processed. The format of result depends on which flag that is set. """ auth, permitted = validate_user(mode="read") query = request.query_string try: settings = parser.make_settings(permitted, {'size': 1000}) parsed = parser.parse_qs(query) mode = parser.get_mode(query) p_extra = parser.parse_extra(parsed, settings) qs = parsed.get('q', []) or parsed.get('query', []) multi = False if not qs: # check if there are multiple words forms to complete qs = settings.get('multi', []) logging.debug('qs %s' % qs) multi = True # use utf8, escape '"' qs = [re.sub('"', '\\"', q.decode('utf8')) for q in qs] headboost = configM.searchfield(mode, 'boosts')[0] res = {} ans = {} # if multi is not true, only one iteration of this loop will be done for q in qs: boost = '''"functions": [{"boost_factor" : "500", "filter":{"term":{"%s":"%s"}}}]''' % (headboost, q) autocompleteq = configM.extra_src(mode, 'autocomplete', autocompletequery) exp = autocompleteq(mode, boost, q) autocomplete_field = configM.searchonefield( mode, 'autocomplete_field') fields = ['"exists": {"field" : "%s"}' % autocomplete_field] # last argument is the 'fields' used for highlightning # TODO use filter? elasticq = parser.search([exp] + p_extra, fields, '', usefilter=True) es = configM.elastic(mode=mode) logging.debug('_source: %s' % autocomplete_field) logging.debug(elasticq) index, typ = configM.get_mode_index(mode) ans = parser.adapt_query( settings['size'], 0, es, loads(elasticq), { 'size': settings['size'], 'index': index, '_source': autocomplete_field }) # save the results for multi res[q] = ans if multi: return jsonify(res) else: # single querys: only return the latest answer return jsonify(ans) except PErr.AuthenticationError as e: logging.exception(e) msg = e.message raise eh.KarpAuthenticationError(msg) except eh.KarpException as e: # pass on karp exceptions logging.exception(e) raise except Exception as e: # catch *all* exceptions logging.exception(e) raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)