korpplugins.lemgramcompleter A Korp plugin implementing /lemgram_complete endpoint, to find lemgram completions for a prefix. """ # TODO: # - Add docstrings and expand existing ones. # - Add parameters affecting the result. import pylibmc import korppluginlib pluginconf = korppluginlib.get_plugin_config( # The name of the lemgram index table in the MySQL database LEMGRAM_DBTABLE="lemgram_index", ) plugin = korppluginlib.KorpEndpointPlugin() @plugin.route("/lemgram_complete", extra_decorators=["prevent_timeout"]) def lemgram_complete(args): """Find lemgrams beginning with the specified prefix. Find lemgram completions beginning with the specified prefix, based on the database table lemgram_index. If corpus ids are specified, prefer lemgrams from those corpora and fill in from the rest. The result is sorted descending by the frequency of the lemgram. Arguments:
# See config.py.template for further documentation of the configuration # variables pluginconf = korppluginlib.get_plugin_config( # All MySQL connection parameters as a dict; if non-empty, overrides the # individual DBCONN_* values DBCONN_PARAMS={}, # MySQL connection parameters as individual values DBCONN_HOST="localhost", DBCONN_PORT=3306, # DBCONN_UNIX_SOCKET should be commented-out unless using a non-default # socket for connecting # DBCONN_UNIX_SOCKET = "" DBCONN_DB="korp_auth", DBCONN_USER="******", DBCONN_PASSWD="", DBCONN_USE_UNICODE=True, DBCONN_CHARSET="utf8mb4", # The name of the table containing licence information, to be filled in # LIST_PROTECTED_CORPORA_SQL LICENCE_TABLE="auth_license", # SQL statement to list protected corpora LIST_PROTECTED_CORPORA_SQL=""" SELECT corpus FROM {LICENCE_TABLE} WHERE NOT license LIKE 'PUB%' """, # Whether to keep the database connection persistent or close after each # call of filter_protected_corpora PERSISTENT_DB_CONNECTION=True, )
pluginconf = korppluginlib.get_plugin_config( # Base directory for log files LOG_BASEDIR="/v/korp/log/korp-py", # Log filename format string (for str.format()) LOG_FILENAME_FORMAT=("{year}{mon:02}{mday:02}/korp-{year}{mon:02}{mday:02}" "_{hour:02}{min:02}{sec:02}-{pid:06}.log"), # Default log level LOG_LEVEL=logging.INFO, # If True, change the log level to logging.DEBUG if the query parameters in # the HTTP request contain "debug=true". LOG_ENABLE_DEBUG_PARAM=True, # Log message format string using the percent formatting for # logging.Formatter. LOG_FORMAT=( "[korp.py %(levelname)s %(process)d:%(starttime_us)d @ %(asctime)s]" " %(message)s"), # The maximum length of a log message, including the fixed part; 0 for # unlimited LOG_MESSAGE_DEFAULT_MAX_LEN=100000, # The text to insert where a log message is truncated to the maximum length LOG_MESSAGE_TRUNCATE_TEXT="[[...CUT...]]", # The position in which to truncate a log message longer than the maximum # length: positive values keep that many characters from the beginning, # negative from the end LOG_MESSAGE_TRUNCATE_POS=-100, # Categories of information to be logged: all available are listed LOG_CATEGORIES=[ "auth", "debug", "env", "load", "memory", "params", "referrer", "result", "rusage", "times", "userinfo", ], # A list of individual log items to be excluded from logging. LOG_EXCLUDE_ITEMS=[], )
""" korpplugins.test1 Korp test plugin for an object- and Blueprint-based plugin proposal: endpoint /test and a result wrapper in a package with a separate configuration module. """ import functools import korppluginlib pluginconf = korppluginlib.get_plugin_config( ARGS_NAME = "args_default", WRAP_NAME = "wrap_default", ) PLUGIN_INFO = { "name": "korppluginlib test plugin 1", "version": "0.1", "date": "2020-12-10", } test_plugin = korppluginlib.KorpEndpointPlugin() @test_plugin.endpoint_decorator def test_decor(generator):
Assumes that corpora contain encoded special characters that would not otherwise be handled correctly (because of limitations of CWB): space, slash, lesser than, greater than. These characters are encoded in CQP queries and decoded in query results. """ import re import korppluginlib # See config.py.template for further documentation of the configuration # variables pluginconf = korppluginlib.get_plugin_config( # Special characters encoded SPECIAL_CHARS=" /<>|", # The character for encoding the first character in SPECIAL_CHARS ENCODED_SPECIAL_CHAR_OFFSET=0x7F, # Prefix for the encoded form of special characters ENCODED_SPECIAL_CHAR_PREFIX="", ) # The following constants and functions would logically belong to the class # SpecialCharacterTranscoder, but they do not use its state, so defining them # as independent functions avoids having to pass self to them. # Special characters in CQP regular expressions that need to be escaped with a # backslash to match literally. If they are not preceded with a backslash, they # should not be replaced in queries. _CQP_REGEX_SPECIAL_CHARS = "()|[].*+?{}^$" # The character with which to replace literal backslashes escaped by another # backslash, so that a regex metacharacter preceded by such will not be # replaced. The literal backslashes are replaced before other replacements and
# - Allow completely removing hidden structures from the result (configurable) # - Allow hiding results from hidden structures in statistics, probably by # adding a term to the CQP query (configurable) # - Hide hidden structure names from the results of corpus_info import korppluginlib # See config.py.template for further documentation of the configuration # variables pluginconf = korppluginlib.get_plugin_config( # Structural attribute (annotation) names marking a structure as hidden and # not to be shown to the user in query results HIDDEN_STRUCT_NAMES=["text__removed"], # The value with which to replace positional attribute values within # structures marked as hidden in query results HIDDEN_VALUE_POS_ATTR="_", # The value with which to replace structural attribute annotation values # within structures marked as hidden in query results HIDDEN_VALUE_STRUCT_ATTR="removed", # Set the match position to 0 in query results within hidden structures HIDE_MATCH_POS=True, ) class QueryContentHider(korppluginlib.KorpCallbackPlugin): """Callback plugin class for hiding the content of marked query results""" def applies_to(self, request): """Apply hiding only to KWIC results.""" return (pluginconf.HIDDEN_STRUCT_NAMES and request.endpoint in ("query", "relations_sentences"))