def tokenize(corpus_name): """ POST /{corpus}/tokenize GET /{corpus}/tokenize?s=... Tokenize the given string for this corpus's language. """ corpus = get_corpus_or_404(corpus_name) # Args... should be a file or strong content = get_string_content() return jsonify(tokens=corpus.tokenize(content, mid_line=False))
def cross_entropy(corpus_name): """ POST /{corpus}/xentropy/ Calculate the cross-entropy of the uploaded file with respect to the corpus. """ corpus = get_corpus_or_404(corpus_name) content = get_string_content() tokens = corpus.tokenize(content) return jsonify(cross_entropy=corpus.cross_entropy(tokens))
def tokenize(corpus_name): """ POST /{corpus}/tokenize GET /{corpus}/tokenize?s=... Tokenize the given string for this corpus's language. """ corpus = get_corpus_or_404(corpus_name) # Args... should be a file or string content = get_string_content() return jsonify(tokens=corpus.tokenize(content, mid_line=False))
def train(corpus_name): """ POST /{corpus}/ Upload a file for training. """ corpus = get_corpus_or_404(corpus_name) content = get_string_content() tokens = corpus.tokenize(content) # NOTE: train doesn't really have a useful return... corpus.train(tokens) return make_response(jsonify(tokens=len(tokens)), 202)
def predict(corpus_name, token_str=""): """ POST /{corpus}/predict/{tokens*} POST /{corpus}/predict/f=? Returns a number of suggestions for the given token prefix. """ corpus = get_corpus_or_404(corpus_name) if token_str: tokens = parse_tokens(token_str) else: tokens = corpus.tokenize(get_string_content()) # Predict returns a nice, JSONable dictionary, so just return that. return jsonify(corpus.predict(tokens))