Python CorpusService 예제들, mcserver.app.services.CorpusService Python 예제들

예제 #1

0

파일 보기

 def how_many_infinitives(urn: str, is_csm: bool) -> int:
     """ Gives back the number of infinitives in the text. """
     aql: str = 'feats=/.*Inf.*/ ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Acc.*/'
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     aql = 'feats=/.*Acc.*/ ->dep[deprel=/(xcomp|ccomp)/] feats=/.*Inf.*/'
     node_ids += CorpusService.find_matches(urn, aql, is_csm=is_csm)
     return round(len(node_ids) / 2)

예제 #2

0

파일 보기

 def how_many_ablativi_absoluti(urn: str, is_csm: bool) -> int:
     """ Gives back the number of ablativi absoluti in the text. """
     aql: str = "tok ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Abl.*/"
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     return round(len(node_ids) / 2)

예제 #3

0

파일 보기

 def how_many_participles(urn: str, is_csm: bool) -> int:
     """Gives back how many participles are in the text"""
     aql: str = "feats=/.*VerbForm=Part.*/"
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     return len(node_ids)

예제 #4

0

파일 보기

파일: exerciseAPI.py 프로젝트: korpling/machina-callida

def post(exercise_data: dict) -> Union[Response, ConnexionResponse]:
    ef: ExerciseForm = ExerciseForm.from_dict(exercise_data)
    ef.urn = ef.urn if ef.urn else ""
    exercise_type: ExerciseType = ExerciseType(ef.type)
    search_values_list: List[str] = json.loads(ef.search_values)
    aqls: List[str] = AnnotationService.map_search_values_to_aql(
        search_values_list=search_values_list, exercise_type=exercise_type)
    search_phenomena: List[Phenomenon] = [
        Phenomenon().__getattribute__(x.split("=")[0].upper())
        for x in search_values_list
    ]
    # if there is custom text instead of a URN, immediately annotate it
    conll_string_or_urn: str = ef.urn if CorpusService.is_urn(
        ef.urn) else AnnotationService.get_udpipe(
            CorpusService.get_raw_text(ef.urn, False))
    try:
        # construct graph from CONLL data
        response: dict = get_graph_data(
            title=ef.urn,
            conll_string_or_urn=conll_string_or_urn,
            aqls=aqls,
            exercise_type=exercise_type,
            search_phenomena=search_phenomena)
    except ValueError:
        return connexion.problem(500, Config.ERROR_TITLE_INTERNAL_SERVER_ERROR,
                                 Config.ERROR_MESSAGE_INTERNAL_SERVER_ERROR)
    solutions_dict_list: List[Dict] = response["solutions"]
    solutions: List[Solution] = [
        Solution.from_dict(x) for x in solutions_dict_list
    ]
    ar: AnnisResponse = make_new_exercise(
        conll=response["conll"],
        correct_feedback=ef.correct_feedback,
        exercise_type=ef.type,
        general_feedback=ef.general_feedback,
        graph_data_raw=response["graph_data_raw"],
        incorrect_feedback=ef.incorrect_feedback,
        instructions=ef.instructions,
        language=ef.language,
        partially_correct_feedback=ef.partially_correct_feedback,
        search_values=ef.search_values,
        solutions=solutions,
        type_translation=ef.type_translation,
        urn=ef.urn,
        work_author=ef.work_author,
        work_title=ef.work_title)
    return NetworkService.make_json_response(ar.to_dict())

예제 #5

0

파일 보기

 def how_many_sub_clauses(urn: str, is_csm: bool) -> int:
     """Gives back the number of subordinate clauses in the text. """
     aql: str = 'tok ->dep[deprel=/(acl|advcl|ccomp|xcomp)/] upostag="VERB"'
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     # TODO: degree of sub clauses; ellipsis not counted
     return round(len(node_ids) / 2)

예제 #6

0

파일 보기

 def how_many_main_clauses(urn: str, is_csm: bool) -> int:
     """ Gives back how many clauses are in the text. """
     # TODO: ellipsis not counted
     aql: str = "deps"
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     return len(node_ids)

예제 #7

0

파일 보기

 def get(self):
     """ Returns matches from ANNIS for a given CTS URN and AQL. """
     # get request arguments
     args: dict = flask.request.args
     urn: str = args["urn"]
     aql: str = args["aql"]
     return NetworkService.make_json_response(
         CorpusService.find_matches(urn, aql, is_csm=True))

예제 #8

0

파일 보기

 def how_many_gerunds(urn: str, is_csm: bool) -> int:
     """ Gives back the number of gerunds in the text. """
     aql: str = "feats=/.*VerbForm=Ger.*/"
     node_ids: List[str] = CorpusService.find_matches(urn,
                                                      aql,
                                                      is_csm=is_csm)
     # TODO: gerundivo
     return len(node_ids)

예제 #9

0

파일 보기

파일: rawTextAPI.py 프로젝트: korpling/machina-callida

def get(urn: str) -> Union[Response, ConnexionResponse]:
    """Provides the raw text for a requested text passage."""
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False)
    if not ar.graph_data.nodes:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    ar.text_complexity = TextComplexityService.text_complexity(
        TextComplexityMeasure.all.name, urn, False, ar.graph_data).to_dict()
    return NetworkService.make_json_response(ar.to_dict())

예제 #10

0

파일 보기

 def get(self):
     """ Returns graph data for a given CTS URN. """
     # get request arguments
     args: Dict = flask.request.args
     cts_urn: str = args["urn"]
     ar: AnnisResponse = CorpusService.get_corpus(cts_urn=cts_urn,
                                                  is_csm=True)
     if not ar.graph_data.nodes:
         abort(404)
     return NetworkService.make_json_response(ar.to_dict())

예제 #11

0

파일 보기

파일: vocabularyAPI.py 프로젝트: korpling/machina-callida

def get(frequency_upper_bound: int, query_urn: str, vocabulary: str) -> Response:
    """ Retrieves sentence ID and matching degree for each sentence in the query text. """
    vc: VocabularyCorpus = VocabularyCorpus[vocabulary]
    vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, frequency_upper_bound)
    # punctuation should count as a match because we don't want to count this as part of the vocabulary
    for char in string.punctuation:
        vocabulary_set.add(char)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn, is_csm=False)
    sentences: List[Sentence] = check_vocabulary(ar.graph_data, vocabulary_set)
    return NetworkService.make_json_response([x.to_dict() for x in sentences])

예제 #12

0

파일 보기

 def post(self):
     """Given the relevant corpus data, gives back search results as graph data."""
     args: dict = {}
     try:
         args = json.loads(flask.request.data.decode("utf-8"))
     except JSONDecodeError:
         abort(400)
     title: str = args["title"]
     annotations_or_urn: str = args["annotations"]
     aqls: List[str] = args["aqls"]
     exercise_type: ExerciseType = ExerciseType[args["exercise_type"]]
     search_phenomena: List[Phenomenon] = [
         Phenomenon().__getattribute__(x.upper())
         for x in args["search_phenomena"]
     ]
     conll: List[TokenList] = CorpusService.get_annotations_from_string(
         annotations_or_urn)
     ret_val: dict = CorpusService.process_corpus_data(
         title, conll, aqls, exercise_type, search_phenomena)
     # serialize the results to json
     return NetworkService.make_json_response(ret_val)

예제 #13

0

파일 보기

def get(urn: str) -> Union[Response, ConnexionResponse]:
    """The GET method for the valid references REST API. It provides references for the desired text."""
    try:
        reff: List[str] = CustomCorpusService.get_custom_corpus_reff(
            urn) if CustomCorpusService.is_custom_corpus_urn(
                urn) else CorpusService.get_standard_corpus_reff(urn)
    except ValueError:
        return connexion.problem(400, Config.ERROR_TITLE_BAD_REQUEST,
                                 Config.ERROR_MESSAGE_BAD_REQUEST)
    if not reff:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    return NetworkService.make_json_response(reff)

예제 #14

0

파일 보기

파일: frequencyAPI.py 프로젝트: korpling/machina-callida

def get(urn: str):
    """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """
    fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn,
                                                                   is_csm=True)
    # map the abbreviated values found by ANNIS to our own model
    skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY}
    for fi in fa:
        for i in range(len(fi.values)):
            if fi.phenomena[i] in skip_set:
                continue
            value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[
                fi.phenomena[i]]
            fi.values[i] = next(
                (x for x in value_map if fi.values[i] in value_map[x]), None)
    return NetworkService.make_json_response([x.to_dict() for x in fa])

예제 #15

0

파일 보기

파일: vocabularyAPI.py 프로젝트: korpling/machina-callida

def post(vocabulary_data: dict):
    """ Indicates for each token of a corpus whether it is covered by a reference vocabulary. """
    vf: VocabularyForm = VocabularyForm.from_dict(vocabulary_data)
    vc: VocabularyCorpus = VocabularyCorpus[vf.vocabulary]
    vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, vf.frequency_upper_bound)
    # punctuation should count as a match because we don't want to count this as part of the vocabulary
    for char in string.punctuation:
        vocabulary_set.add(char)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn, is_csm=False)
    for node in ar.graph_data.nodes:
        if not is_match(target_lemma=node.udep_lemma, vocabulary_set=vocabulary_set):
            node.is_oov = True
    ar: AnnisResponse = AnnisResponse(
        solutions=[], uri="", exercise_id="", graph_data=ar.graph_data)
    ar.text_complexity = TextComplexityService.text_complexity(
        TextComplexityMeasure.all.name, vf.query_urn, False, ar.graph_data).to_dict()
    return NetworkService.make_json_response(ar.to_dict())

예제 #16

0

파일 보기

파일: exerciseAPI.py 프로젝트: korpling/machina-callida

def get(eid: str) -> Union[Response, ConnexionResponse]:
    exercise: TExercise = DatabaseService.query(Exercise,
                                                filter_by=dict(eid=eid),
                                                first=True)
    if not exercise:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND)
    ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn,
                                                 is_csm=False)
    if not ar.graph_data.nodes:
        return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND,
                                 Config.ERROR_MESSAGE_CORPUS_NOT_FOUND)
    exercise.last_access_time = datetime.utcnow().timestamp()
    DatabaseService.commit()
    exercise_type: ExerciseType = ExerciseType(exercise.exercise_type)
    ar.solutions = json.loads(exercise.solutions)
    ar.uri = NetworkService.get_exercise_uri(exercise)
    ar.exercise_id = exercise.eid
    ar.exercise_type = exercise_type.value
    return NetworkService.make_json_response(ar.to_dict())

예제 #17

0

파일 보기

파일: exerciseService.py 프로젝트: korpling/machina-callida

 def update_exercises(is_csm: bool) -> None:
     """Deletes old exercises."""
     if DatabaseService.has_table(Config.DATABASE_TABLE_EXERCISE):
         exercises: List[Exercise] = DatabaseService.query(Exercise)
         now: datetime = datetime.utcnow()
         for exercise in exercises:
             exercise_datetime: datetime = datetime.fromtimestamp(
                 exercise.last_access_time)
             # delete exercises that have not been accessed for a while, are not compatible anymore, or contain
             # corrupted / empty data
             if (now - exercise_datetime).total_seconds() > Config.INTERVAL_EXERCISE_DELETE or \
                     not exercise.urn or not json.loads(exercise.solutions):
                 db.session.delete(exercise)
                 DatabaseService.commit()
             # manually add text complexity measures for old exercises
             elif not exercise.text_complexity:
                 ar: AnnisResponse = CorpusService.get_corpus(exercise.urn,
                                                              is_csm=is_csm)
                 tc: TextComplexity = TextComplexityService.text_complexity(
                     TextComplexityMeasure.all.name, exercise.urn, is_csm,
                     ar.graph_data)
                 exercise.text_complexity = tc.all
                 DatabaseService.commit()

예제 #18

0

파일 보기

파일: textcomplexityAPI.py 프로젝트: korpling/machina-callida

def get(measure: str, urn: str):
    """Gives users measures of text complexity for a given text."""
    ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False)
    tc: TextComplexity = TextComplexityService.text_complexity(
        measure, urn, False, ar.graph_data)
    return NetworkService.make_json_response(tc.to_dict())