Esempio n. 1
0
korpplugins.lemgramcompleter

A Korp plugin implementing /lemgram_complete endpoint, to find lemgram
completions for a prefix.
"""

# TODO:
# - Add docstrings and expand existing ones.
# - Add parameters affecting the result.

import pylibmc

import korppluginlib

pluginconf = korppluginlib.get_plugin_config(
    # The name of the lemgram index table in the MySQL database
    LEMGRAM_DBTABLE="lemgram_index", )

plugin = korppluginlib.KorpEndpointPlugin()


@plugin.route("/lemgram_complete", extra_decorators=["prevent_timeout"])
def lemgram_complete(args):
    """Find lemgrams beginning with the specified prefix.

    Find lemgram completions beginning with the specified prefix, based
    on the database table lemgram_index. If corpus ids are specified,
    prefer lemgrams from those corpora and fill in from the rest. The
    result is sorted descending by the frequency of the lemgram.

    Arguments:
Esempio n. 2
0
# See config.py.template for further documentation of the configuration
# variables
pluginconf = korppluginlib.get_plugin_config(
    # All MySQL connection parameters as a dict; if non-empty, overrides the
    # individual DBCONN_* values
    DBCONN_PARAMS={},
    # MySQL connection parameters as individual values
    DBCONN_HOST="localhost",
    DBCONN_PORT=3306,
    # DBCONN_UNIX_SOCKET should be commented-out unless using a non-default
    # socket for connecting
    # DBCONN_UNIX_SOCKET = ""
    DBCONN_DB="korp_auth",
    DBCONN_USER="******",
    DBCONN_PASSWD="",
    DBCONN_USE_UNICODE=True,
    DBCONN_CHARSET="utf8mb4",
    # The name of the table containing licence information, to be filled in
    # LIST_PROTECTED_CORPORA_SQL
    LICENCE_TABLE="auth_license",
    # SQL statement to list protected corpora
    LIST_PROTECTED_CORPORA_SQL="""
        SELECT corpus FROM {LICENCE_TABLE}
        WHERE NOT license LIKE 'PUB%'
    """,
    # Whether to keep the database connection persistent or close after each
    # call of filter_protected_corpora
    PERSISTENT_DB_CONNECTION=True,
)

Esempio n. 3
0
pluginconf = korppluginlib.get_plugin_config(
    # Base directory for log files
    LOG_BASEDIR="/v/korp/log/korp-py",
    # Log filename format string (for str.format())
    LOG_FILENAME_FORMAT=("{year}{mon:02}{mday:02}/korp-{year}{mon:02}{mday:02}"
                         "_{hour:02}{min:02}{sec:02}-{pid:06}.log"),
    # Default log level
    LOG_LEVEL=logging.INFO,
    # If True, change the log level to logging.DEBUG if the query parameters in
    # the HTTP request contain "debug=true".
    LOG_ENABLE_DEBUG_PARAM=True,
    # Log message format string using the percent formatting for
    # logging.Formatter.
    LOG_FORMAT=(
        "[korp.py %(levelname)s %(process)d:%(starttime_us)d @ %(asctime)s]"
        " %(message)s"),
    # The maximum length of a log message, including the fixed part; 0 for
    # unlimited
    LOG_MESSAGE_DEFAULT_MAX_LEN=100000,
    # The text to insert where a log message is truncated to the maximum length
    LOG_MESSAGE_TRUNCATE_TEXT="[[...CUT...]]",
    # The position in which to truncate a log message longer than the maximum
    # length: positive values keep that many characters from the beginning,
    # negative from the end
    LOG_MESSAGE_TRUNCATE_POS=-100,
    # Categories of information to be logged: all available are listed
    LOG_CATEGORIES=[
        "auth",
        "debug",
        "env",
        "load",
        "memory",
        "params",
        "referrer",
        "result",
        "rusage",
        "times",
        "userinfo",
    ],
    # A list of individual log items to be excluded from logging.
    LOG_EXCLUDE_ITEMS=[],
)
Esempio n. 4
0
"""
korpplugins.test1

Korp test plugin for an object- and Blueprint-based plugin proposal: endpoint
/test and a result wrapper in a package with a separate configuration module.
"""


import functools

import korppluginlib


pluginconf = korppluginlib.get_plugin_config(
    ARGS_NAME = "args_default",
    WRAP_NAME = "wrap_default",
)


PLUGIN_INFO = {
    "name": "korppluginlib test plugin 1",
    "version": "0.1",
    "date": "2020-12-10",
}


test_plugin = korppluginlib.KorpEndpointPlugin()


@test_plugin.endpoint_decorator
def test_decor(generator):
Esempio n. 5
0
Assumes that corpora contain encoded special characters that would not
otherwise be handled correctly (because of limitations of CWB): space,
slash, lesser than, greater than. These characters are encoded in CQP
queries and decoded in query results.
"""

import re

import korppluginlib

# See config.py.template for further documentation of the configuration
# variables
pluginconf = korppluginlib.get_plugin_config(
    # Special characters encoded
    SPECIAL_CHARS=" /<>|",
    # The character for encoding the first character in SPECIAL_CHARS
    ENCODED_SPECIAL_CHAR_OFFSET=0x7F,
    # Prefix for the encoded form of special characters
    ENCODED_SPECIAL_CHAR_PREFIX="",
)

# The following constants and functions would logically belong to the class
# SpecialCharacterTranscoder, but they do not use its state, so defining them
# as independent functions avoids having to pass self to them.

# Special characters in CQP regular expressions that need to be escaped with a
# backslash to match literally. If they are not preceded with a backslash, they
# should not be replaced in queries.
_CQP_REGEX_SPECIAL_CHARS = "()|[].*+?{}^$"
# The character with which to replace literal backslashes escaped by another
# backslash, so that a regex metacharacter preceded by such will not be
# replaced. The literal backslashes are replaced before other replacements and
Esempio n. 6
0
# - Allow completely removing hidden structures from the result (configurable)
# - Allow hiding results from hidden structures in statistics, probably by
#   adding a term to the CQP query (configurable)
# - Hide hidden structure names from the results of corpus_info

import korppluginlib

# See config.py.template for further documentation of the configuration
# variables
pluginconf = korppluginlib.get_plugin_config(
    # Structural attribute (annotation) names marking a structure as hidden and
    # not to be shown to the user in query results
    HIDDEN_STRUCT_NAMES=["text__removed"],
    # The value with which to replace positional attribute values within
    # structures marked as hidden in query results
    HIDDEN_VALUE_POS_ATTR="_",
    # The value with which to replace structural attribute annotation values
    # within structures marked as hidden in query results
    HIDDEN_VALUE_STRUCT_ATTR="removed",
    # Set the match position to 0 in query results within hidden structures
    HIDE_MATCH_POS=True,
)


class QueryContentHider(korppluginlib.KorpCallbackPlugin):
    """Callback plugin class for hiding the content of marked query results"""
    def applies_to(self, request):
        """Apply hiding only to KWIC results."""
        return (pluginconf.HIDDEN_STRUCT_NAMES
                and request.endpoint in ("query", "relations_sentences"))