Beispiel #1
0
def get(last_update_time: int) -> Union[Response, ConnexionResponse]:
    """The GET method for the corpus list REST API. It provides metadata for all available texts."""
    ui_cts: UpdateInfo = DatabaseService.query(
        UpdateInfo, filter_by=dict(resource_type=ResourceType.cts_data.name), first=True)
    if ui_cts and ui_cts.last_modified_time >= last_update_time / 1000:
        corpora: List[Corpus] = DatabaseService.query(Corpus)
        return NetworkService.make_json_response([x.to_dict() for x in corpora])
    return NetworkService.make_json_response(None)
def update_exercises() -> Union[Response, ConnexionResponse]:
    """ Gets all static exercises from the frontend code repository and looks for the lemmata in them."""
    # TODO: check last update of the directory before pulling the whole zip archive
    response: Response = requests.get(Config.STATIC_EXERCISES_REPOSITORY_URL, stream=True)
    if not response.ok:
        return connexion.problem(
            503, Config.ERROR_TITLE_SERVICE_UNAVAILABLE, Config.ERROR_MESSAGE_SERVICE_UNAVAILABLE)
    relevant_strings_dict: Dict[str, Set[str]] = get_relevant_strings(response)
    file_dict: Dict = {}
    lemma_set: Set[str] = set()
    for url in relevant_strings_dict:
        for word in relevant_strings_dict[url]:
            if word not in lemma_set:
                lemma_set.add(word)
                input_bytes = bytearray(word, encoding='utf-8', errors='strict')
                file_handler, file_path = mkstemp()
                os.write(file_handler, input_bytes)
                file_dict[file_path] = file_handler
    result_string: str = AnnotationService.get_udpipe("", False, file_dict)
    search_results: List[Tuple[str, str]] = re.findall(r"1\t([a-zA-Z]*)\t([a-zA-Z]*)", result_string)
    search_results_dict: Dict[str, int] = {item[0]: i for (i, item) in enumerate(search_results)}
    for url in relevant_strings_dict:
        # the URN points to Cicero's letters to his brother Quintus, 1.1.8-1.1.10
        NetworkService.exercises[url] = StaticExercise(
            solutions=[], urn="urn:cts:latinLit:phi0474.phi058.perseus-lat1:1.1.8-1.1.10")
        for word in relevant_strings_dict[url]:
            # UDpipe cannot handle name abbreviations, so remove the punctuation and only keep the upper case letter
            if word[-1] in string.punctuation:
                word = word[:-1]
            NetworkService.exercises[url].solutions.append(list(search_results[search_results_dict[word]]))
    NetworkService.exercises_last_update = datetime.fromtimestamp(time())
    return NetworkService.make_json_response(
        {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})
Beispiel #3
0
def post(file_data: dict) -> Response:
    """ The POST method for the File REST API.

    It writes learning results or HTML content to the disk for later access. """
    lr_string: str = file_data.get("learning_result", None)
    if lr_string:
        lr_dict: dict = json.loads(lr_string)
        for exercise_id in lr_dict:
            xapi_statement: XapiStatement = XapiStatement(lr_dict[exercise_id])
            save_learning_result(xapi_statement)
        return NetworkService.make_json_response(str(True))
    else:
        file_type: FileType = file_data["file_type"]
        existing_file: DownloadableFile = FileService.make_tmp_file_from_html(
            file_data["urn"], file_type, file_data["html_content"])
        return NetworkService.make_json_response(existing_file.file_name)
def post(network_data: dict) -> Response:
    """
    The POST method for the vector network REST API. It provides sentences whose content is similar to a given word.
    """
    vnf: VectorNetworkForm = VectorNetworkForm.from_dict(network_data)
    nearest_neighbor_count = vnf.nearest_neighbor_count if vnf.nearest_neighbor_count else 10
    w2v: Word2Vec = Word2Vec.load(Config.PANEGYRICI_LATINI_MODEL_PATH)
    search_regex: Pattern[str] = re.compile(vnf.search_regex)
    keys: List[str] = [x for x in w2v.wv.vocab if search_regex.match(x)]
    relevant_vectors: List[ndarray] = [w2v.wv.get_vector(x) for x in keys]
    target_vector: ndarray = sum(relevant_vectors) / len(relevant_vectors)
    sentences: List[str] = open(Config.PANEGYRICI_LATINI_TEXT_PATH).readlines()
    sentence_vectors: Dict[int, ndarray] = {}
    for i in range(len(sentences)):
        toks: List[str] = sentences[i][:-1].split()
        if toks:
            vecs: List[ndarray] = []
            for tok in toks:
                vector: ndarray = w2v.wv.get_vector(tok)
                vecs.append(vector)
            sentence_vectors[i] = sum(vecs) / len(vecs)
    sims: List[Tuple[int, ndarray]] = []
    for key in sentence_vectors.keys():
        sims.append((key,
                     dot(matutils.unitvec(target_vector),
                         matutils.unitvec(sentence_vectors[key]))))
    sims.sort(key=lambda x: x[1], reverse=True)
    sims = sims[:nearest_neighbor_count]
    return NetworkService.make_json_response(
        [sentences[x[0]].split() for x in sims])
Beispiel #5
0
 def get(self):
     """ Returns matches from ANNIS for a given CTS URN and AQL. """
     # get request arguments
     args: dict = flask.request.args
     urn: str = args["urn"]
     aql: str = args["aql"]
     return NetworkService.make_json_response(
         CorpusService.find_matches(urn, aql, is_csm=True))
def get() -> Union[Response, ConnexionResponse]:
    """ The GET method for the StaticExercises REST API. It provides a list of static exercises
    and their respective URLs in the frontend. """
    # TODO: WRITE AND READ LAST UPDATE TIME FROM THE DATABASE
    if datetime.fromtimestamp(time() - Config.INTERVAL_STATIC_EXERCISES) > NetworkService.exercises_last_update \
            or len(NetworkService.exercises) == 0:
        return update_exercises()
    return NetworkService.make_json_response(
        {x: NetworkService.exercises[x].to_dict() for x in NetworkService.exercises})
Beispiel #7
0
def get(urn: str) -> Union[Response, ConnexionResponse]:
    """Provides the raw text for a requested text passage."""
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False)
    if not ar.graph_data.nodes:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    ar.text_complexity = TextComplexityService.text_complexity(
        TextComplexityMeasure.all.name, urn, False, ar.graph_data).to_dict()
    return NetworkService.make_json_response(ar.to_dict())
Beispiel #8
0
def get(cid: int) -> Union[Response, ConnexionResponse]:
    """The GET method for the corpus REST API. It provides metadata for a specific text."""
    corpus: Corpus = DatabaseService.query(Corpus,
                                           filter_by=dict(cid=cid),
                                           first=True)
    if not corpus:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    return NetworkService.make_json_response(corpus.to_dict())
Beispiel #9
0
 def get(self):
     """ Returns graph data for a given CTS URN. """
     # get request arguments
     args: Dict = flask.request.args
     cts_urn: str = args["urn"]
     ar: AnnisResponse = CorpusService.get_corpus(cts_urn=cts_urn,
                                                  is_csm=True)
     if not ar.graph_data.nodes:
         abort(404)
     return NetworkService.make_json_response(ar.to_dict())
def get(frequency_upper_bound: int, query_urn: str, vocabulary: str) -> Response:
    """ Retrieves sentence ID and matching degree for each sentence in the query text. """
    vc: VocabularyCorpus = VocabularyCorpus[vocabulary]
    vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, frequency_upper_bound)
    # punctuation should count as a match because we don't want to count this as part of the vocabulary
    for char in string.punctuation:
        vocabulary_set.add(char)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn, is_csm=False)
    sentences: List[Sentence] = check_vocabulary(ar.graph_data, vocabulary_set)
    return NetworkService.make_json_response([x.to_dict() for x in sentences])
def get(lang: str,
        frequency_upper_bound: int,
        last_update_time: int,
        vocabulary: str = ""):
    """The GET method for the exercise list REST API. It provides metadata for all available exercises."""
    vocabulary_set: Set[str]
    ui_exercises: UpdateInfo = DatabaseService.query(
        UpdateInfo,
        filter_by=dict(resource_type=ResourceType.exercise_list.name),
        first=True)
    if ui_exercises.last_modified_time < last_update_time / 1000:
        return NetworkService.make_json_response([])
    try:
        vc: VocabularyCorpus = VocabularyCorpus[vocabulary]
        vocabulary_set = FileService.get_vocabulary_set(
            vc, frequency_upper_bound)
    except KeyError:
        vocabulary_set = set()
    lang: Language
    try:
        lang = Language(lang)
    except ValueError:
        lang = Language.English
    exercises: List[Exercise] = DatabaseService.query(
        Exercise, filter_by=dict(language=lang.value))
    matching_exercises: List[MatchingExercise] = [
        MatchingExercise.from_dict(x.to_dict()) for x in exercises
    ]
    if len(vocabulary_set):
        for exercise in matching_exercises:
            conll: List[TokenList] = conllu.parse(exercise.conll)
            lemmata: List[str] = [
                tok["lemma"] for sent in conll for tok in sent.tokens
            ]
            exercise.matching_degree = sum(
                (1 if x in vocabulary_set else 0)
                for x in lemmata) / len(lemmata) * 100
    ret_val: List[dict] = [
        NetworkService.serialize_exercise(x, compress=True)
        for x in matching_exercises
    ]
    return NetworkService.make_json_response(ret_val)
Beispiel #12
0
def delete(cid: int) -> Union[Response, ConnexionResponse]:
    """The DELETE method for the corpus REST API. It deletes metadata for a specific text."""
    corpus: Corpus = DatabaseService.query(Corpus,
                                           filter_by=dict(cid=cid),
                                           first=True)
    if not corpus:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    db.session.delete(corpus)
    DatabaseService.commit()
    return NetworkService.make_json_response(True)
Beispiel #13
0
def get(eid: str) -> Union[Response, ConnexionResponse]:
    exercise: TExercise = DatabaseService.query(Exercise,
                                                filter_by=dict(eid=eid),
                                                first=True)
    if not exercise:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn,
                                                 is_csm=False)
    if not ar.graph_data.nodes:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    exercise.last_access_time = datetime.utcnow().timestamp()
    DatabaseService.commit()
    exercise_type: ExerciseType = ExerciseType(exercise.exercise_type)
    ar.solutions = json.loads(exercise.solutions)
    ar.uri = NetworkService.get_exercise_uri(exercise)
    ar.exercise_id = exercise.eid
    ar.exercise_type = exercise_type.value
    return NetworkService.make_json_response(ar.to_dict())
Beispiel #14
0
def patch(cid: int, **kwargs) -> Union[Response, ConnexionResponse]:
    """The PUT method for the corpus REST API. It provides updates metadata for a specific text."""
    corpus: Corpus = DatabaseService.query(Corpus,
                                           filter_by=dict(cid=cid),
                                           first=True)
    if not corpus:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    for k, v in kwargs.items():
        if v is not None:
            setattr(corpus, k, v)
    DatabaseService.commit()
    return NetworkService.make_json_response(corpus.to_dict())
Beispiel #15
0
def get(urn: str) -> Union[Response, ConnexionResponse]:
    """The GET method for the valid references REST API. It provides references for the desired text."""
    try:
        reff: List[str] = CustomCorpusService.get_custom_corpus_reff(
            urn) if CustomCorpusService.is_custom_corpus_urn(
                urn) else CorpusService.get_standard_corpus_reff(urn)
    except ValueError:
        return connexion.problem(400, Config.ERROR_TITLE_BAD_REQUEST,
                                 Config.ERROR_MESSAGE_BAD_REQUEST)
    if not reff:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    return NetworkService.make_json_response(reff)
def get(urn: str):
    """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
    fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn,
                                                                   is_csm=True)
    # map the abbreviated values found by ANNIS to our own model
    skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY}
    for fi in fa:
        for i in range(len(fi.values)):
            if fi.phenomena[i] in skip_set:
                continue
            value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[
                fi.phenomena[i]]
            fi.values[i] = next(
                (x for x in value_map if fi.values[i] in value_map[x]), None)
    return NetworkService.make_json_response([x.to_dict() for x in fa])
Beispiel #17
0
def post(exercise_data: dict) -> Union[Response, ConnexionResponse]:
    ef: ExerciseForm = ExerciseForm.from_dict(exercise_data)
    ef.urn = ef.urn if ef.urn else ""
    exercise_type: ExerciseType = ExerciseType(ef.type)
    search_values_list: List[str] = json.loads(ef.search_values)
    aqls: List[str] = AnnotationService.map_search_values_to_aql(
        search_values_list=search_values_list, exercise_type=exercise_type)
    search_phenomena: List[Phenomenon] = [
        Phenomenon().__getattribute__(x.split("=")[0].upper())
        for x in search_values_list
    ]
    # if there is custom text instead of a URN, immediately annotate it
    conll_string_or_urn: str = ef.urn if CorpusService.is_urn(
        ef.urn) else AnnotationService.get_udpipe(
            CorpusService.get_raw_text(ef.urn, False))
    try:
        # construct graph from CONLL data
        response: dict = get_graph_data(
            title=ef.urn,
            conll_string_or_urn=conll_string_or_urn,
            aqls=aqls,
            exercise_type=exercise_type,
            search_phenomena=search_phenomena)
    except ValueError:
        return connexion.problem(500, Config.ERROR_TITLE_INTERNAL_SERVER_ERROR,
                                 Config.ERROR_MESSAGE_INTERNAL_SERVER_ERROR)
    solutions_dict_list: List[Dict] = response["solutions"]
    solutions: List[Solution] = [
        Solution.from_dict(x) for x in solutions_dict_list
    ]
    ar: AnnisResponse = make_new_exercise(
        conll=response["conll"],
        correct_feedback=ef.correct_feedback,
        exercise_type=ef.type,
        general_feedback=ef.general_feedback,
        graph_data_raw=response["graph_data_raw"],
        incorrect_feedback=ef.incorrect_feedback,
        instructions=ef.instructions,
        language=ef.language,
        partially_correct_feedback=ef.partially_correct_feedback,
        search_values=ef.search_values,
        solutions=solutions,
        type_translation=ef.type_translation,
        urn=ef.urn,
        work_author=ef.work_author,
        work_title=ef.work_title)
    return NetworkService.make_json_response(ar.to_dict())
def post(vocabulary_data: dict):
    """ Indicates for each token of a corpus whether it is covered by a reference vocabulary. """
    vf: VocabularyForm = VocabularyForm.from_dict(vocabulary_data)
    vc: VocabularyCorpus = VocabularyCorpus[vf.vocabulary]
    vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, vf.frequency_upper_bound)
    # punctuation should count as a match because we don't want to count this as part of the vocabulary
    for char in string.punctuation:
        vocabulary_set.add(char)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn, is_csm=False)
    for node in ar.graph_data.nodes:
        if not is_match(target_lemma=node.udep_lemma, vocabulary_set=vocabulary_set):
            node.is_oov = True
    ar: AnnisResponse = AnnisResponse(
        solutions=[], uri="", exercise_id="", graph_data=ar.graph_data)
    ar.text_complexity = TextComplexityService.text_complexity(
        TextComplexityMeasure.all.name, vf.query_urn, False, ar.graph_data).to_dict()
    return NetworkService.make_json_response(ar.to_dict())
Beispiel #19
0
def get(eid: str, lang: str,
        solution_indices: List[int]) -> Union[Response, ConnexionResponse]:
    """ The GET method for the H5P REST API. It provides JSON templates for client-side H5P exercise layouts. """
    language: Language = determine_language(lang)
    exercise: Exercise = DatabaseService.query(Exercise,
                                               filter_by=dict(eid=eid),
                                               first=True)
    if not exercise:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND)
    text_field_content: str = get_text_field_content(exercise,
                                                     solution_indices)
    if not text_field_content:
        return connexion.problem(422, Config.ERROR_TITLE_UNPROCESSABLE_ENTITY,
                                 Config.ERROR_MESSAGE_UNPROCESSABLE_ENTITY)
    response_dict: dict = TextService.json_template_mark_words
    response_dict = get_response(response_dict, language,
                                 TextService.json_template_drag_text, exercise,
                                 text_field_content,
                                 TextService.feedback_template)
    return NetworkService.make_json_response(response_dict)
Beispiel #20
0
 def post(self):
     """Given the relevant corpus data, gives back search results as graph data."""
     args: dict = {}
     try:
         args = json.loads(flask.request.data.decode("utf-8"))
     except JSONDecodeError:
         abort(400)
     title: str = args["title"]
     annotations_or_urn: str = args["annotations"]
     aqls: List[str] = args["aqls"]
     exercise_type: ExerciseType = ExerciseType[args["exercise_type"]]
     search_phenomena: List[Phenomenon] = [
         Phenomenon().__getattribute__(x.upper())
         for x in args["search_phenomena"]
     ]
     conll: List[TokenList] = CorpusService.get_annotations_from_string(
         annotations_or_urn)
     ret_val: dict = CorpusService.process_corpus_data(
         title, conll, aqls, exercise_type, search_phenomena)
     # serialize the results to json
     return NetworkService.make_json_response(ret_val)
Beispiel #21
0
def post(kwic_data: dict) -> Response:
    """ The POST method for the KWIC REST API. It provides example contexts for a given phenomenon
    in a given corpus. """
    kwic_form: KwicForm = KwicForm.from_dict(kwic_data)
    search_values_list: List[str] = json.loads(kwic_form.search_values)
    aqls: List[str] = AnnotationService.map_search_values_to_aql(
        search_values_list, ExerciseType.kwic)
    url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}{Config.SERVER_URI_CSM_SUBGRAPH}"
    data: str = json.dumps(
        dict(urn=kwic_data["urn"],
             aqls=aqls,
             ctx_left=str(kwic_form.ctx_left),
             ctx_right=str(kwic_form.ctx_right)))
    response: requests.Response = requests.post(url, data=data)
    response_content: List[dict] = json.loads(response.text)
    exercise_data_list: List[ExerciseData] = [
        ExerciseData(json_dict=x) for x in response_content
    ]
    ret_val: str = ""
    for i in range(len(exercise_data_list)):
        ret_val += handle_exercise_data(exercise_data_list[i],
                                        kwic_form.ctx_left,
                                        kwic_form.ctx_right)
    return NetworkService.make_json_response(ret_val)
def get(search_regex: str, highlight_regex: str, min_count: int,
        nearest_neighbor_count: int) -> Response:
    """The GET method for the vector network REST API. It provides network data for the vectors in an AI model."""
    ret_val: str = get_concept_network(search_regex, min_count,
                                       highlight_regex, nearest_neighbor_count)
    return NetworkService.make_json_response(ret_val)
def get(measure: str, urn: str):
    """Gives users measures of text complexity for a given text."""
    ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False)
    tc: TextComplexity = TextComplexityService.text_complexity(
        measure, urn, False, ar.graph_data)
    return NetworkService.make_json_response(tc.to_dict())
def get(urn: str):
    """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
    url: str = f"{Config.INTERNET_PROTOCOL}{Config.HOST_IP_CSM}:{Config.CORPUS_STORAGE_MANAGER_PORT}" + \
               Config.SERVER_URI_FREQUENCY
    response: requests.Response = requests.get(url, params=dict(urn=urn))
    return NetworkService.make_json_response(json.loads(response.text))