Exemple #1
0
def testquery():
    """ Returns the query expressed in elastics search api """
    auth, permitted = validate_user(mode="read")
    try:
        from translator import parser
        query = request.query_string
        # default
        settings = parser.make_settings(permitted, {'size': 25, 'page': 0})
        elasticq = parser.parse(query, settings)
        mode = settings['mode']
        if not settings.get('sort', ''):
            # default: group by lexicon, then sort by score
            sort = configM.searchfield(mode, 'sort_by')
        else:
            sort = settings['sort']
        start = settings['start'] if 'start' in settings\
                                  else settings['page'] * settings['size']
        query = unquote(query)
        elasticq = parser.parse(query)
        return elasticq + dumps({
            'sort': sort,
            '_from': start,
            'size': settings['size'],
            'version': 'true'
        })
    except Exception as e:  # catch *all* exceptions
        # TODO only catch relevant exceptions
        logging.exception(e)
        raise eh.KarpGeneralError(e, query)
def checkhistory(lexicon, lid):
    """ Shows the update log of an entry """
    from src.dbhandler.dbhandler import dbselect
    auth, permitted = validate_user(mode="read")
    settings = {'allowed': permitted}
    size = helpers.get_size(default=10, settings=settings)
    return jsonify({'updates': dbselect(lexicon, _id=lid, max_hits=size)})
def comparejson(lexicon, _id, fromdate='', todate=''):
    from src.dbhandler.dbhandler import dbselect
    import src.server.translator.jsondiff as jsondiff
    auth, permitted = validate_user()
    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to update')

    try:
        if not todate:
            import datetime
            todate = datetime.datetime.now()
            tojson = dbselect(lexicon, max_hits=1, to_date=todate, _id=_id)[0]
        else:
            tojson = dbselect(lexicon, exact_date=todate, _id=_id)[0]

    # TODO catch the error here and print it to the log.
    # It is probably not really sql that raises the exception
    except Exception:
        raise eh.KarpDbError('Could not find any entry from %s' % todate)

    try:
        if not fromdate:
            jsons = dbselect(lexicon, max_hits=2, to_date=todate, _id=_id)
            fromjson = {'doc': {}} if len(jsons) == 1 else jsons[1]
        else:
            fromjson = dbselect(lexicon, exact_date=fromdate, _id=_id)[0]

        fromjson = fromjson['doc']
        tojson = tojson['doc']
    # TODO catch the error here and print it to the log.
    # It is probably not really sql that raises the exception
    except Exception:
        raise eh.KarpDbError('Could not find any entry from %s' % fromdate)
    return jsonify({'diff': jsondiff.compare(fromjson, tojson)})
Exemple #4
0
def requestquery(page=0):
    """ The Function for querying our database """
    # page is assumed to be 0 indexed here
    query = request.query_string
    auth, permitted = validate_user(mode="read")
    try:
        # default values
        default = {'size': 25, 'page': page, 'version': 'true'}
        settings = parser.make_settings(permitted, default)
        elasticq = parser.parse(query, settings)
    except PErr.QueryError as e:
        logging.exception(e)
        raise eh.KarpQueryError('Parse error',
                                debug_msg=e.message,
                                query=query)
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except Exception as e:  # catch *all* exceptions
        logging.exception(e)
        raise eh.KarpQueryError("Could not parse data",
                                debug_msg=e,
                                query=query)
    mode = settings['mode']
    sort = sortorder(settings, mode, settings.get('query_command', ''))
    start = settings['start'] if 'start' in settings\
                              else settings['page'] * settings['size']

    # size = min(settings['size'], setupconf.max_page)
    size = settings['size']
    index, typ = configM.get_mode_index(mode)
    exclude = configM.searchfield(mode, 'secret_fields') if not auth else []
    ans = parser.adapt_query(
        size, start, configM.elastic(mode=mode), loads(elasticq), {
            'size': size,
            'sort': sort,
            'from_': start,
            'index': index,
            '_source_exclude': exclude,
            'version': settings['version'],
            'search_type': 'dfs_query_then_fetch'
        })

    if settings.get('highlight', False):
        clean_highlight(ans)

    if settings.get('format') or settings.get('export'):
        formatmethod = 'format' if 'format' in settings else 'export'
        toformat = settings.get(formatmethod)
        msg = 'Unkown %s %s for mode %s' % (formatmethod, toformat, mode)
        format_posts = configM.extra_src(mode, formatmethod,
                                         helpers.notdefined(msg))
        format_posts(ans, configM.elastic(mode=mode), mode, index, toformat)

    return ans
Exemple #5
0
def export(lexicon):
    # TODO can user with only read permissions export all the lexicon?
    # (eg saol)
    auth, permitted = validate_user(mode="read")
    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to search the '
                                         'lexicon %s' % lexicon)

    settings = parser.make_settings(permitted, {
        "size": -1,
        "resource": lexicon
    })
    query = request.query_string
    parsed = parser.parse_qs(query)
    parser.parse_extra(parsed, settings)
    date = settings.get('date', '')
    mode = settings.get('mode', '')
    if date:
        from dateutil.parser import parserinfo, parse
        from datetime import datetime
        # parse the date as inclusive (including the whole selected day)
        date = parse(date,
                     parserinfo(yearfirst=True),
                     default=datetime(1999, 01, 01, 23, 59))

    to_keep = {}
    engine, db_entry = db.get_engine(lexicon, echo=False)
    logging.debug('exporting entries from %s ' % lexicon)
    for entry in db.dbselect(lexicon,
                             engine=engine,
                             db_entry=db_entry,
                             max_hits=-1,
                             to_date=date):
        _id = entry['id']
        if _id in to_keep:
            last = to_keep[_id]['date']
            if last < entry['date']:
                to_keep[_id] = entry
        else:
            to_keep[_id] = entry

    ans = [
        val['doc'] for val in to_keep.values() if val['status'] != 'removed'
    ]
    ans = ans[:settings['size']]

    logging.debug('exporting %s entries' % len(ans))
    if settings.get('format', ''):
        toformat = settings.get('format')
        index, typ = configM.get_mode_index(mode)
        msg = 'Unkown %s %s for mode %s' % ('format', toformat, mode)
        format_posts = configM.extra_src(mode, 'exportformat',
                                         helpers.notdefined(msg))
        lmf, err = format_posts(ans, lexicon, mode, toformat)
        return Response(lmf, mimetype='text/xml')

    else:
        return jsonify({lexicon: ans})
Exemple #6
0
def querycount(page=0):
    # TODO error if buckets is used here
    # TODO validate_user is also done once in requestquery
    # but since we need the permitted dict, it is called
    # here as well
    auth, permitted = validate_user(mode="read")
    try:
        # TODO buckets should be gathered from some config
        default = {
            "buckets": ['lexiconOrder', 'lexiconName'],
            "size": configM.setupconfig['MAX_PAGE']
        }
        settings = parser.make_settings(permitted, default)
        q_ans = requestquery(page=page)
        # TODO does search_type=count work with the new es version?
        # if not, use query_then_fetch, size=0

        # raise the size for the statistics call
        stat_size = configM.setupconfig['MAX_PAGE']
        count_elasticq, more = parser.statistics(
            request.query_string,
            settings,
            order={"lexiconOrder": ("_term", "asc")},
            show_missing=False,
            force_size=stat_size)
        mode = settings['mode']
        es = configM.elastic(mode=mode)
        index, typ = configM.get_mode_index(mode)
        count_ans = es.search(
            index=index,
            body=loads(count_elasticq),
            search_type="count",
            # raise the size for the statistics call
            size=stat_size)
        distribution = count_ans['aggregations']['q_statistics'][
            'lexiconOrder']['buckets']
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise

    except (elasticsearch.RequestError, elasticsearch.TransportError) as e:
        logging.exception(e)
        raise eh.KarpElasticSearchError(
            "ElasticSearch failure. Message: %s.\n" % e)

    except Exception as e:  # catch *all* exceptions
        # Remember that 'buckets' is not allowed here! %s"
        logging.exception(e)
        raise eh.KarpQueryError("Could not parse data",
                                debug_msg=e,
                                query=request.query_string)
    return jsonify({'query': q_ans, 'distribution': distribution})
Exemple #7
0
def statlist():
    # TODO add is_more here (as above)
    """ Returns the counts and stats for the query """
    auth, permitted = validate_user(mode="read")
    try:
        query = request.query_string
        mode = parser.get_mode(query)
        logging.debug('mode is %s' % mode)
        default = {
            "buckets": configM.searchfield(mode, 'statistics_buckets'),
            "size": 100,
            "cardinality": False
        }
        settings = parser.make_settings(permitted, default)

        exclude = [] if auth else configM.searchfield(mode, 'secret_fields')
        elasticq, more = parser.statistics(query,
                                           settings,
                                           exclude=exclude,
                                           prefix='STAT_')
        es = configM.elastic(mode=settings['mode'])
        is_more = check_bucketsize(more, settings["size"], mode, es)
        # TODO allow more than 100 000 hits here?
        size = settings['size']
        index, typ = configM.get_mode_index(settings['mode'])
        ans = es.search(index=index,
                        body=loads(elasticq),
                        search_type="count",
                        size=size)
        tables = []
        for key, val in ans['aggregations']['q_statistics'].items():
            if key.startswith('STAT_'):
                tables.extend(generate_table(val, []))
        # the length of tables might be longer than size, so truncate it
        # generating shorter tables is not faster than generating all of it
        # and then truncating
        if size:
            tables = tables[:size]
        return jsonify({"stat_table": tables, "is_more": is_more})

    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except Exception as e:  # catch *all* exceptions
        # raise
        logging.exception(e)
        raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
Exemple #8
0
def minientry():
    """ Returns the counts and stats for the query """
    max_page = configM.setupconfig['MINIENTRY_PAGE']
    auth, permitted = validate_user(mode="read")
    try:
        query = request.query_string
        mode = parser.get_mode(query)
        default = {
            'show': configM.searchfield(mode, 'minientry_fields'),
            'size': 25
        }
        settings = parser.make_settings(permitted, default)
        elasticq = parser.parse(query, settings)
        show = settings['show']
        if not auth:
            # show = show - exclude
            exclude = configM.searchfield(mode, 'secret_fields')
            show = list(set(show).difference(exclude))

        sort = sortorder(settings, mode, settings.get('query_command', ''))
        start = settings['start'] if 'start' in settings else 0
        es = configM.elastic(mode=settings['mode'])
        index, typ = configM.get_mode_index(settings['mode'])
        ans = parser.adapt_query(
            settings['size'], start, es, loads(elasticq), {
                'index': index,
                '_source': show,
                'from_': start,
                'sort': sort,
                'size': min(settings['size'], max_page),
                'search_type': 'dfs_query_then_fetch'
            })
        if settings.get('highlight', False):
            clean_highlight(ans)

        return jsonify(ans)
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except PErr.QueryError as e:
        raise eh.KarpQueryError("Parse error, %s" % e.message,
                                debug_msg=e,
                                query=query)
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except Exception as e:  # catch *all* exceptions
        logging.exception(e)
        raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
Exemple #9
0
def add_multi_doc(lexicon, index=''):
    import src.dbhandler.dbhandler as db

    data = helpers.read_data()
    documents = data.get('doc', '') or data.get('_source')
    message = data['message']
    es, index, typ = helpers.get_update_index(lexicon)
    # validate that the user may update the lexica
    authdict, permitted = auth.validate_user()
    if lexicon not in permitted:
        errstr = 'You are not allowed to modify the lexicon %s'
        raise eh.KarpAuthenticationError(errstr % lexicon, status_code=403)
    user = helpers.get_user()
    try:
        bulk, sql_bulk, ids = [], [], []
        ok = 0
        for doc in documents:
            doc['lexiconName'] = lexicon
            validate.validate_json(doc, lexicon)
            date = datetime.datetime.now()
            auto_update_document(doc, lexicon, 'add', user, date)
            bulk.append({'_index': index, '_type': typ, '_source': doc})

        for index, res in enumerate(eshelpers.streaming_bulk(es, bulk)):
            _id = res[1].get('create').get('_id')
            source = bulk[index].get('_source')
            if isinstance(source, dict):
                source = dumps(source)
            sql_bulk.append((_id, source, user, 'multi add - %s' % message,
                             lexicon, 'imported'))
            ids.append(_id)
            ok += 1

    except (esExceptions.RequestError, esExceptions.TransportError) as e:
        handle_update_error(e, data, user, 'add')
        raise eh.KarpElasticSearchError("Error during update. Message: %s.\n" %
                                        str(e))

    db_loaded, db_error = db.update_bulk(lexicon, sql_bulk)
    if db_error:
        logging.debug(db_error)

    jsonans = {
        'es_loaded': ok,
        'sql_loaded': db_loaded,
        'suggestion': False,
        'ids': ids
    }
    return jsonify(jsonans)
Exemple #10
0
def checksuggestions():
    from dbhandler.dbhandler import dbselect
    auth, permitted = validate_user()
    settings = {'allowed': permitted}
    helpers.get_querysettings(settings)
    size = settings.get('size', 50)
    lexicons = settings.get('resource', [])
    status = settings.get('status', ['waiting', 'rejected', 'accepted'])
    logging.debug('checksuggestions in %s' % lexicons)
    if not lexicons:
        return jsonify({'updates': []})
    updates = []
    for lexicon in lexicons:
        # add updates from lexicons that are kept in sql
        if configM.get_lexicon_sql(lexicon):
            updates.extend(dbselect(lexicon, suggestion=True, status=status,
                                    max_hits=size))

    return jsonify({'updates': updates})
def checkuserhistory():
    """ Shows the updates a user has made """
    try:
        auth, permitted = validate_user()
        user = helpers.get_user()
    except AttributeError:
        raise eh.KarpGeneralError('No user name provided', 'checkuserhistory')
    try:
        size = helpers.get_size(default=10, settings={'allowed': permitted})
        from src.dbhandler.dbhandler import dbselect
        updates = []
        for lexicon in permitted:
            # add updates from lexicons that are kept in sql
            if configM.get_lexicon_sql(lexicon):
                updates.extend(dbselect(lexicon, user=user, max_hits=size))

        return jsonify({'updates': updates})
    except Exception as e:
        logging.exception(e)
        raise eh.KarpGeneralError(str(e))
Exemple #12
0
def explain():
    query = request.query_string
    auth, permitted = validate_user(mode="read")
    try:
        # default
        settings = parser.make_settings(permitted, {'size': 25, 'page': 0})
        elasticq = parser.parse(query, settings)
    except PErr.QueryError as e:
        raise eh.KarpQueryError("Parse error", debug_msg=e, query=query)
    es = configM.elastic(mode=settings['mode'])
    index, typ = configM.get_mode_index(settings['mode'])
    ex_ans = es.indices.validate_query(index=index,
                                       body=loads(elasticq),
                                       explain=True)
    q_ans = requestquery(page=0)
    return jsonify({
        'elastic_json_query': loads(elasticq),
        'ans': q_ans,
        'explain': ex_ans
    })
Exemple #13
0
def rejectsuggestion(lexicon, _id):
    from dbhandler.dbhandler import dbselect
    try:
        origin = dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)[0]
    except Exception as e:
        # if error occurs here, the suggestion is not in sql
        raise eh.KarpDbError('Rejection not found',
                             'Rejection not found: %s' % str(e))
    auth, permitted = validate_user()
    set_lexicon = origin["doc"]["lexiconName"]
    helpers.check_lexiconName(lexicon, set_lexicon, 'rejectsuggestion', _id)
    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to update lexicon %s'
                                         % lexicon)
    try:
        origin = dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)[0]
        # delete from suggestion index
        # the user log in is checked in delete_entry
        # delete_entry raises exception if ES fails
        sugg_index, typ = configM.get_lexicon_suggindex(lexicon)

        ans = update.delete_entry(lexicon, _id, sql=False, live=False,
                                  suggestion=True)
        request.get_data()
        data = loads(request.data)
        message = data.get('message')
        # mark as rejected
        ok, err = update.modify_db(_id, lexicon, message, "rejected")

        ans['sugg_db_loaded'] = ok
        if not ok:
            logging.debug(err)
        update.send_notification(origin['user'], message, _id, "rejected")
        return jsonify(ans)
    except (esExceptions.RequestError, esExceptions.TransportError) as e:
        update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'reject')
        raise eh.KarpElasticSearchError("Error during update. Document not saved.",
                                        debug_msg=str(e))
    except Exception as e:
        update.handle_update_error(e, {"id": _id}, helpers.get_user(), 'reject')
        raise eh.KarpGeneralError(str(e))
Exemple #14
0
def savesuggestion(lexicon, _id, status='accepted', source=''):
    from dbhandler.dbhandler import dbselect
    sugg_index, typ = configM.get_lexicon_suggindex(lexicon)
    es = configM.elastic(lexicon=lexicon)
    suggestion = es.get(index=sugg_index, id=_id)
    auth, permitted = validate_user()
    set_lexicon = suggestion["_source"]["lexiconName"]
    helpers.check_lexiconName(lexicon, set_lexicon, 'rejectsuggestion', _id)
    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to update lexicon %s'
                                         % lexicon)

    origin = dbselect(lexicon, suggestion=True, _id=_id, max_hits=1)[0]
    origid = origin['origid']
    request.get_data()
    data = loads(request.data)
    message = data.get('message')
    suggestion['message'] = message
    suggestion['version'] = origin['version']
    if not source:
        source = suggestion
    # the user log in is checked in add_doc
    # add_doc raises exception if ES
    if origid:
        # update in ES
        ans = update.update_doc(lexicon, origid, data=source, live=False)
    else:
        # add to ES
        ans = update.add_doc(lexicon, live=False, data=source)
        origid = ans.get('_id')
    # mark as accepted
    ok, err = update.modify_db(_id, lexicon, message, status, origid=origid)
    # delete from suggestion index
    suggans = update.delete_entry(lexicon, _id, sql=False, live=False,
                                  suggestion=True)
    ans['sugg_db_loaded'] = ok
    ans['sugg_es_ans'] = suggans
    if not ok:
        logging.debug(err)
    update.send_notification(origin['user'], message, _id, status)
    return ans
def checklexiconhistory(lexicon, date):
    """ Shows the updates on one lexicon """
    try:
        auth, permitted = validate_user()
        if lexicon not in permitted:
            raise eh.KarpAuthenticationError(
                'You are not allowed to update lexicon %s' % lexicon)
        settings = {"allowed": permitted}
        helpers.get_querysettings(settings)
        size = settings.get('size', 10)
        status = settings.get('status', ['added', 'changed', 'removed'])

        from src.dbhandler.dbhandler import dbselect
        return jsonify({
            'resource':
            lexicon,
            'updates':
            dbselect(lexicon, status=status, from_date=date, max_hits=size)
        })
    except Exception as e:
        raise eh.KarpGeneralError(str(e))
Exemple #16
0
def random():
    auth, permitted = validate_user(mode="read")
    try:
        query = request.query_string
        mode = parser.get_mode(query)
        default = {
            "show": configM.searchfield(mode, 'minientry_fields'),
            "size": 1
        }
        settings = parser.make_settings(permitted, default)
        elasticq = parser.random(query, settings)
        es = configM.elastic(mode=mode)
        index, typ = configM.get_mode_index(mode)
        es_q = {
            'index': index,
            'body': loads(elasticq),
            'size': settings['size']
        }
        if settings['show']:
            show = settings['show']
            if not auth:
                # show = show - exclude
                exclude = configM.searchfield(mode, 'secret_fields')
                show = list(set(show).difference(exclude))
            es_q['_source'] = show

        ans = es.search(**es_q)
        return jsonify(ans)
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except Exception as e:  # catch *all* exceptions
        logging.exception(e)
        raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
Exemple #17
0
def formatpost():
    """ Formats the posted data into wanted format
        The data should be a list
        Currently only working for saol
    """
    # get and parse data
    request.get_data()
    data = request.data
    try:
        data = loads(data)
    except ValueError as e:
        raise eh.KarpParsingError(str(e))

    # set all allowed lexicons (to avoid authentication exception
    auth, permitted = validate_user(mode="read")
    # find the wanted format
    settings = parser.make_settings(permitted, {'size': 25})
    query = request.query_string
    parsed = parser.parse_qs(query)
    parser.parse_extra(parsed, settings)
    to_format = settings.get('format', '')
    mode = parser.get_mode(query)
    logging.debug('mode "%s"' % mode)
    index, typ = configM.get_mode_index(mode)

    if to_format:
        if type(data) != list:
            data = [data]
        errmsg = 'Unkown format %s for mode %s' % (settings['format'], mode)
        format_list = configM.extra_src(mode, 'format_list',
                                        helpers.notdefined(errmsg))
        ok, html = format_list(data, configM.elastic(mode=mode),
                               settings['format'], index)
        return jsonify({'all': len(data), 'ok': ok, 'data': html})

    else:
        raise eh.KarpQueryError('Unkown format %s' % to_format)
Exemple #18
0
def statistics():
    """ Returns the counts and stats for the query """
    auth, permitted = validate_user(mode="read")
    try:
        query = request.query_string
        mode = parser.get_mode(query)
        default = {
            "buckets": configM.searchfield(mode, 'statistics_buckets'),
            "size": 100,
            "cardinality": False
        }
        settings = parser.make_settings(permitted, default)
        exclude = [] if auth else configM.searchfield(mode, 'secret_fields')

        elasticq, more = parser.statistics(query, settings, exclude=exclude)
        es = configM.elastic(mode=settings['mode'])
        is_more = check_bucketsize(more, settings, mode, es)

        index, typ = configM.get_mode_index(settings['mode'])
        # TODO allow more than 100 000 hits here?
        ans = es.search(index=index,
                        body=loads(elasticq),
                        search_type="count",
                        size=settings['size'])
        ans["is_more"] = is_more
        return jsonify(ans)
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except Exception as e:  # catch *all* exceptions
        logging.exception(e)
        raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
Exemple #19
0
def get_context(lexicon):
    """ Find and return the alphabetically (or similar, as specified for the
    lexicon) context of a word/entry.
    """
    auth, permitted = validate_user(mode="read")
    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to search the '
                                         'lexicon %s' % lexicon)
    # make default settings
    settings = parser.make_settings(permitted, {
        "size": 10,
        "resource": lexicon
    })
    # parse querystring
    query = request.query_string
    parsed = parser.parse_qs(query)
    # parse parameter settings
    parser.parse_extra(parsed, settings)

    # set searching configurations
    mode = configM.get_lexicon_mode(lexicon)
    settings['mode'] = mode
    es = configM.elastic(mode=mode)
    index, typ = configM.get_mode_index(mode)

    # get the sort_by list (eg. ['baseform.sort', 'lemmaid.search'])
    # leave out lexiconOrder and _score
    sortfieldnames = [
        field for field in configM.searchconf(mode, 'sort_by')
        if field not in ['_score', 'lexiconOrder']
    ]
    # get the sort field paths (eg. ['FormRep.baseform.raw', 'lemmaid.raw'])
    # Used for sorting.
    sortfield = sum([F.lookup_multiple(f, mode) for f in sortfieldnames], [])
    # get the field name of the head sort field. Used for searching
    sortfieldname = sortfieldnames[0]

    # find the center entry (by its id)
    if 'center' in settings:
        center_id = settings['center']
        lexstart = es.search(index=index,
                             doc_type=typ,
                             size=1,
                             body={"query": {
                                 "term": {
                                     "_id": center_id
                                 }
                             }},
                             sort=['%s:asc' % f for f in sortfield])
    # if no center id is given, pick the first entry of the lexicon
    else:
        exps = []
        parser.parse_ext('and|resource|equals|%s' % lexicon, exps, [], mode)
        center_q = parser.search(exps, [], [], usefilter=True)
        lexstart = es.search(index=index,
                             doc_type=typ,
                             size=1,
                             body=center_q,
                             sort=['%s:asc' % f for f in sortfield])
        center_id = lexstart['hits']['hits'][0]['_id']

        # lexstart = es.search(index=index, doc_type=typ, size=1,
        #                      sort=['%s:asc' % f for f in sortfield])
        # center_id = lexstart['hits']['hits'][0]['_id']

    if not lexstart['hits']['hits']:
        logging.error('No center found %s, %s' % (center_id, lexstart))
        raise eh.KarpElasticSearchError("Could not find entry %s" % center_id)

    centerentry = lexstart['hits']['hits'][0]
    logging.debug('center %s, %s' % (centerentry, centerentry['_id']))
    origentry_sort = [key for key in centerentry['sort'] if key is not None][0]
    # TODO what to do if the sort key is not in the lexicon? as below?
    # origentry_sort = centerentry['sort'][0]
    sortvalue = control_escape(origentry_sort)
    sortvalue = control_escape(origentry_sort)
    logging.debug(u'Orig entry escaped key %s' % sortvalue)

    # Construct queries to es
    exps = []
    querystring = settings.get('q', '').decode(
        'utf8')  # the query string from the user
    parser.parse_ext('and|resource|equals|%s' % lexicon, exps, [], mode)
    if querystring:
        if querystring.startswith('simple'):
            querystring = 'and|anything|equals|%s' % querystring.split('|')[-1]
        else:
            querystring = re.sub('extended\|\|', '', querystring)
        parser.parse_ext(querystring, exps, [], mode)

    preexps = copy.deepcopy(exps)  # deep copy for the pre-query
    parser.parse_ext('and|%s|gte|%s' % (sortfieldname, sortvalue), exps, [],
                     mode)
    elasticq_post = parser.search(exps, [], [], usefilter=True)

    parser.parse_ext('and|%s|lte|%s' % (sortfieldname, sortvalue), preexps, [],
                     mode)
    elasticq_pre = parser.search(preexps, [], [], usefilter=True)

    # +1 to compensate for the word itself being in the context
    size = settings['size'] + 1
    show = configM.searchfield(mode, 'minientry_fields')
    # TODO size*3 (magic number) because many entries may have the same sort
    # value (eg homographs in saol)
    ans_pre = parser.adapt_query(
        size * 3, 0, es, elasticq_pre, {
            'size': size * 3,
            'from_': 0,
            'sort': ['%s:desc' % f for f in sortfield],
            'index': index,
            '_source': show,
            'search_type': 'dfs_query_then_fetch'
        })

    ans_post = parser.adapt_query(
        size * 3, 0, es, elasticq_post, {
            'size': size * 3,
            'from_': 0,
            'sort': ['%s:asc' % f for f in sortfield],
            'index': index,
            '_source': show,
            'search_type': 'dfs_query_then_fetch'
        })

    hits_pre = ans_pre.get('hits', {}).get('hits', [])
    hits_post = ans_post.get('hits', {}).get('hits', [])
    hits_pre = go_to_sortkey(hits_pre, origentry_sort, center_id)
    hits_post = go_to_sortkey(hits_post, origentry_sort, center_id)
    return jsonify({
        "pre": hits_pre[:settings['size']],
        "post": hits_post[:settings['size']],
        "center": centerentry
    })
Exemple #20
0
def update_doc(lexicon, _id, data=None, live=True):
    """ Updates a posted document in the index 'index' with type 'typ'.
        The document must contain a field called 'doc' with
        the information to be sent.
        The fields 'version' and 'message' are optional.
    """
    # send user name and password,
    # {'doc' : es_doc, 'version' : last version, 'message' : update message}
    authdict, permitted = auth.validate_user()

    if data is None:
        data = helpers.read_data()

    try:
        index, typ = configM.get_lexicon_index(lexicon)
        es = configM.elastic(lexicon=lexicon)
        origin = es.get(index=index, id=_id)
    except Exception as e:
        logging.warning("Looking for entry at the wrong place:")
        logging.exception(e)
        msg = "The entry %s in lexicon %s was not found" % (_id, lexicon)
        raise eh.KarpElasticSearchError(msg,
                                        debug_msg=msg + " in index " + index)

    lexiconName = origin['_source']['lexiconName']
    helpers.check_lexiconName(lexicon, lexiconName, _id, 'update')
    data_doc = data.get('doc') or data.get('_source')
    version = data.get('version')
    msg = data["message"]

    if lexicon not in permitted:
        raise eh.KarpAuthenticationError('You are not allowed to modify the '
                                         'lexicon %s, only %s' %
                                         (lexicon, permitted),
                                         status_code=403)

    # TODO validate data_doc, but this is so far sb specific!
    validate.validate_json(data_doc, lexicon)
    date = datetime.datetime.now()
    user = helpers.get_user()
    auto_update_document(data_doc, lexicon, 'update', user, date)
    try:
        if version is not None and version != -1:
            ans = es.index(index=index,
                           doc_type=typ,
                           id=_id,
                           version=version,
                           body=data_doc,
                           op_type='index')
        else:
            ans = es.index(index=index,
                           doc_type=typ,
                           id=_id,
                           body=data_doc,
                           op_type='index')

    except (esExceptions.RequestError, esExceptions.TransportError) as e:
        # Transport error might be version conflict
        logging.exception(e)
        logging.debug('index: %s, type: %s, id: %s' % (index, typ, _id))
        handle_update_error(e, {"id": _id, "data": data}, user, 'update')
        raise eh.KarpElasticSearchError("Error during update. Message: %s.\n" %
                                        str(e))
    except Exception as e:
        handle_update_error(e, {"id": _id, "data": data}, user, 'update')
        raise eh.KarpElasticSearchError("Unexpected error during update.")

    db_loaded, db_error = update_db(_id,
                                    data_doc,
                                    user,
                                    msg,
                                    lexiconName,
                                    status='changed',
                                    date=date)

    jsonans = {'es_loaded': 1, 'sql_loaded': db_loaded, 'es_ans': ans}
    if db_error:
        logging.debug(db_error)
    if live:
        return jsonify(jsonans)
    else:
        return jsonans
Exemple #21
0
def autocomplete():
    """ Returns lemgrams matching the query text.
        Each mode specifies in the configs which fields that should be
        considered.
        The parameter 'q' or 'query' is used when only one word form is to be
        processed.
        The parameter 'multi' is used when multiple word forms should be
        processed.
        The format of result depends on which flag that is set.
    """
    auth, permitted = validate_user(mode="read")
    query = request.query_string
    try:
        settings = parser.make_settings(permitted, {'size': 1000})
        parsed = parser.parse_qs(query)
        mode = parser.get_mode(query)
        p_extra = parser.parse_extra(parsed, settings)
        qs = parsed.get('q', []) or parsed.get('query', [])
        multi = False
        if not qs:
            # check if there are multiple words forms to complete
            qs = settings.get('multi', [])
            logging.debug('qs %s' % qs)
            multi = True

        # use utf8, escape '"'
        qs = [re.sub('"', '\\"', q.decode('utf8')) for q in qs]

        headboost = configM.searchfield(mode, 'boosts')[0]
        res = {}
        ans = {}
        # if multi is not true, only one iteration of this loop will be done
        for q in qs:
            boost = '''"functions": [{"boost_factor" : "500",
                        "filter":{"term":{"%s":"%s"}}}]''' % (headboost, q)

            autocompleteq = configM.extra_src(mode, 'autocomplete',
                                              autocompletequery)
            exp = autocompleteq(mode, boost, q)
            autocomplete_field = configM.searchonefield(
                mode, 'autocomplete_field')
            fields = ['"exists": {"field" : "%s"}' % autocomplete_field]
            # last argument is the 'fields' used for highlightning
            # TODO use filter?
            elasticq = parser.search([exp] + p_extra,
                                     fields,
                                     '',
                                     usefilter=True)

            es = configM.elastic(mode=mode)
            logging.debug('_source: %s' % autocomplete_field)
            logging.debug(elasticq)
            index, typ = configM.get_mode_index(mode)
            ans = parser.adapt_query(
                settings['size'], 0, es, loads(elasticq), {
                    'size': settings['size'],
                    'index': index,
                    '_source': autocomplete_field
                })
            # save the results for multi
            res[q] = ans
        if multi:
            return jsonify(res)
        else:
            # single querys: only return the latest answer
            return jsonify(ans)
    except PErr.AuthenticationError as e:
        logging.exception(e)
        msg = e.message
        raise eh.KarpAuthenticationError(msg)
    except eh.KarpException as e:  # pass on karp exceptions
        logging.exception(e)
        raise
    except Exception as e:  # catch *all* exceptions
        logging.exception(e)
        raise eh.KarpGeneralError("Unknown error", debug_msg=e, query=query)
Exemple #22
0
def checkuser():
    """ Shows which lexica the user may edit """
    authdict, permitted = auth.validate_user(mode="verbose")
    return jsonify(permitted)
Exemple #23
0
def add_doc(lexicon,
            index='',
            _id=None,
            suggestion=False,
            data=None,
            live=True):
    """ Adds an entry to the index 'index' with type 'typ' in ES and sql.
        The post data must contain a field called 'doc' with the information to
        be sent.
        The fields 'version' and 'message' are optional.
    """
    if data is None or not data:
        data = helpers.read_data()

    data_doc = data.get('doc', '') or data.get('_source')
    message = data['message']
    version = data.get('version', -1)
    es, index, typ = helpers.get_update_index(lexicon, suggestion=suggestion)
    lexiconName = lexicon or data_doc.get("lexiconName", '')
    helpers.check_lexiconName(lexicon, lexiconName, 'add', _id)

    # lexiconOrder = data_doc.get("lexiconOrder", None)
    if not lexiconName:
        raise eh.KarpParsingError("The field lexiconName is empty, "
                                  "although it is required.")

    if suggestion:
        orgin_id = _id or True  # save as reference in db
        _id = ''  # add as new to the suggestion index
        status = 'waiting'
        user = data['user']
    else:
        # validate that the user may update the lexica
        authdict, permitted = auth.validate_user()
        if lexiconName not in permitted:
            errstr = 'You are not allowed to modify the lexicon %s'
            raise eh.KarpAuthenticationError(errstr % lexiconName,
                                             status_code=403)

        orgin_id = ''  # not a suggestion
        user = helpers.get_user()
        status = 'added'
    try:
        # TODO validate data_doc, but this is so far sb specific!

        validate.validate_json(data_doc, lexicon)

        date = datetime.datetime.now()
        auto_update_document(data_doc, lexiconName, 'add', user, date)
        ans = es.index(index=index, doc_type=typ, body=data_doc, id=_id)
        _id = ans.get('_id')
        db_loaded, db_error = update_db(_id,
                                        data_doc,
                                        user,
                                        message,
                                        lexiconName,
                                        version=version,
                                        suggestion=orgin_id,
                                        status=status,
                                        date=date)

    except (esExceptions.RequestError, esExceptions.TransportError) as e:
        handle_update_error(e, data, user, 'add')
        raise eh.KarpElasticSearchError("Error during update. Message: %s.\n" %
                                        str(e))
    except Exception as e:
        raise eh.KarpGeneralError(str(e))

    jsonans = {
        'es_loaded': 1,
        'sql_loaded': db_loaded,
        'es_ans': ans,
        'suggestion': suggestion,
        'id': _id
    }
    if db_error:
        logging.debug(db_error)
    if live:
        return jsonify(jsonans)
    else:
        return jsonans
Exemple #24
0
def delete_entry(lexicon, _id, sql=False, live=True, suggestion=False):
    # delete by id
    try:
        query = request.query_string
        parsed = parse_qs(query)
        msg = parsed.get('message', ['removed'])[0]
        es, index, typ = helpers.get_update_index(lexicon,
                                                  suggestion=suggestion)
        ans_entry = es.get(index=index, doc_type=typ, id=_id)
        lexiconName = ans_entry['_source']['lexiconName']
        helpers.check_lexiconName(lexicon, lexiconName, _id, 'delete')

        authdict, permitted = auth.validate_user()
        if lexiconName not in permitted:
            raise eh.KarpAuthenticationError('You are not allowed to modify '
                                             'the lexicon %s, only %s' %
                                             (lexiconName, permitted))

        # doc_type must be set
        ans = es.delete(index=index, doc_type=typ, id=_id)
        db_loaded, db_error = 0, ''
        if sql:
            # logging.debug("Delete " + msg)
            logging.debug('delete from sql.\nmsg %s\nans_entry %s' %
                          (msg, ans_entry))
            db_loaded, db_error = update_db(_id,
                                            ans_entry['_source'],
                                            helpers.get_user(),
                                            msg,
                                            lexiconName,
                                            status='removed')
            logging.debug('updated db %s %s' % (db_loaded, db_error))

        if db_error:
            raise eh.KarpDbError(db_error)

    except eshelpers.BulkIndexError as e:
        # BulkIndexException is thrown for other parse errors
        # This exception has errors instead of error
        handle_update_error(e, {"id": _id}, helpers.get_user(), 'delete')
        err = [er['create']['error'] for er in e.errors]
        raise eh.KarpElasticSearchError("Error during deletion %s.\n" %
                                        '\n'.join(err))

    except (esExceptions.TransportError, esExceptions.RequestError) as e:
        # elasticsearch-py throws errors (TransportError)
        # for invalid (empty) objects
        handle_update_error(e, {"id": _id}, helpers.get_user(), 'delete')
        err = [e.error]
        raise eh.KarpElasticSearchError('Error during deletion. '
                                        'Message: %s.\n' % '\n'.join(err))

    except esExceptions.TransportError as e:
        # elasticsearch-py throws errors (TransportError) for objects not found
        handle_update_error(e, {"id": _id}, helpers.get_user(), 'delete')
        err = [e.error]
        raise eh.KarpElasticSearchError('Error during deletion. Object not '
                                        'found. Message: %s.\n' %
                                        '\n'.join(err))

    except Exception as e:
        handle_update_error(e, {"id": _id}, helpers.get_user(), 'delete')
        err = ['Oops, an unpredicted error', str(e), 'Document not deleted']
        raise eh.KarpGeneralError('Document not deleted',
                                  debug_msg=' '.join(err))

    jsonans = {'es_loaded': 1, 'sql_loaded': db_loaded, 'es_ans': ans}
    if db_error:
        logging.debug(db_error)
    if live:
        return jsonify(jsonans)
    else:
        return jsonans