def how_many_infinitives(urn: str, is_csm: bool) -> int: """ Gives back the number of infinitives in the text. """ aql: str = 'feats=/.*Inf.*/ ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Acc.*/' node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) aql = 'feats=/.*Acc.*/ ->dep[deprel=/(xcomp|ccomp)/] feats=/.*Inf.*/' node_ids += CorpusService.find_matches(urn, aql, is_csm=is_csm) return round(len(node_ids) / 2)
def how_many_ablativi_absoluti(urn: str, is_csm: bool) -> int: """ Gives back the number of ablativi absoluti in the text. """ aql: str = "tok ->dep[deprel=/(nsubj|nsubj:pass|csubj|csubj:pass)/] feats=/.*Abl.*/" node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) return round(len(node_ids) / 2)
def how_many_participles(urn: str, is_csm: bool) -> int: """Gives back how many participles are in the text""" aql: str = "feats=/.*VerbForm=Part.*/" node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) return len(node_ids)
def post(exercise_data: dict) -> Union[Response, ConnexionResponse]: ef: ExerciseForm = ExerciseForm.from_dict(exercise_data) ef.urn = ef.urn if ef.urn else "" exercise_type: ExerciseType = ExerciseType(ef.type) search_values_list: List[str] = json.loads(ef.search_values) aqls: List[str] = AnnotationService.map_search_values_to_aql( search_values_list=search_values_list, exercise_type=exercise_type) search_phenomena: List[Phenomenon] = [ Phenomenon().__getattribute__(x.split("=")[0].upper()) for x in search_values_list ] # if there is custom text instead of a URN, immediately annotate it conll_string_or_urn: str = ef.urn if CorpusService.is_urn( ef.urn) else AnnotationService.get_udpipe( CorpusService.get_raw_text(ef.urn, False)) try: # construct graph from CONLL data response: dict = get_graph_data( title=ef.urn, conll_string_or_urn=conll_string_or_urn, aqls=aqls, exercise_type=exercise_type, search_phenomena=search_phenomena) except ValueError: return connexion.problem(500, Config.ERROR_TITLE_INTERNAL_SERVER_ERROR, Config.ERROR_MESSAGE_INTERNAL_SERVER_ERROR) solutions_dict_list: List[Dict] = response["solutions"] solutions: List[Solution] = [ Solution.from_dict(x) for x in solutions_dict_list ] ar: AnnisResponse = make_new_exercise( conll=response["conll"], correct_feedback=ef.correct_feedback, exercise_type=ef.type, general_feedback=ef.general_feedback, graph_data_raw=response["graph_data_raw"], incorrect_feedback=ef.incorrect_feedback, instructions=ef.instructions, language=ef.language, partially_correct_feedback=ef.partially_correct_feedback, search_values=ef.search_values, solutions=solutions, type_translation=ef.type_translation, urn=ef.urn, work_author=ef.work_author, work_title=ef.work_title) return NetworkService.make_json_response(ar.to_dict())
def how_many_sub_clauses(urn: str, is_csm: bool) -> int: """Gives back the number of subordinate clauses in the text. """ aql: str = 'tok ->dep[deprel=/(acl|advcl|ccomp|xcomp)/] upostag="VERB"' node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) # TODO: degree of sub clauses; ellipsis not counted return round(len(node_ids) / 2)
def how_many_main_clauses(urn: str, is_csm: bool) -> int: """ Gives back how many clauses are in the text. """ # TODO: ellipsis not counted aql: str = "deps" node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) return len(node_ids)
def get(self): """ Returns matches from ANNIS for a given CTS URN and AQL. """ # get request arguments args: dict = flask.request.args urn: str = args["urn"] aql: str = args["aql"] return NetworkService.make_json_response( CorpusService.find_matches(urn, aql, is_csm=True))
def how_many_gerunds(urn: str, is_csm: bool) -> int: """ Gives back the number of gerunds in the text. """ aql: str = "feats=/.*VerbForm=Ger.*/" node_ids: List[str] = CorpusService.find_matches(urn, aql, is_csm=is_csm) # TODO: gerundivo return len(node_ids)
def get(urn: str) -> Union[Response, ConnexionResponse]: """Provides the raw text for a requested text passage.""" ar: AnnisResponse = CorpusService.get_corpus(cts_urn=urn, is_csm=False) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) ar.text_complexity = TextComplexityService.text_complexity( TextComplexityMeasure.all.name, urn, False, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict())
def get(self): """ Returns graph data for a given CTS URN. """ # get request arguments args: Dict = flask.request.args cts_urn: str = args["urn"] ar: AnnisResponse = CorpusService.get_corpus(cts_urn=cts_urn, is_csm=True) if not ar.graph_data.nodes: abort(404) return NetworkService.make_json_response(ar.to_dict())
def get(frequency_upper_bound: int, query_urn: str, vocabulary: str) -> Response: """ Retrieves sentence ID and matching degree for each sentence in the query text. """ vc: VocabularyCorpus = VocabularyCorpus[vocabulary] vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, frequency_upper_bound) # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=query_urn, is_csm=False) sentences: List[Sentence] = check_vocabulary(ar.graph_data, vocabulary_set) return NetworkService.make_json_response([x.to_dict() for x in sentences])
def post(self): """Given the relevant corpus data, gives back search results as graph data.""" args: dict = {} try: args = json.loads(flask.request.data.decode("utf-8")) except JSONDecodeError: abort(400) title: str = args["title"] annotations_or_urn: str = args["annotations"] aqls: List[str] = args["aqls"] exercise_type: ExerciseType = ExerciseType[args["exercise_type"]] search_phenomena: List[Phenomenon] = [ Phenomenon().__getattribute__(x.upper()) for x in args["search_phenomena"] ] conll: List[TokenList] = CorpusService.get_annotations_from_string( annotations_or_urn) ret_val: dict = CorpusService.process_corpus_data( title, conll, aqls, exercise_type, search_phenomena) # serialize the results to json return NetworkService.make_json_response(ret_val)
def get(urn: str) -> Union[Response, ConnexionResponse]: """The GET method for the valid references REST API. It provides references for the desired text.""" try: reff: List[str] = CustomCorpusService.get_custom_corpus_reff( urn) if CustomCorpusService.is_custom_corpus_urn( urn) else CorpusService.get_standard_corpus_reff(urn) except ValueError: return connexion.problem(400, Config.ERROR_TITLE_BAD_REQUEST, Config.ERROR_MESSAGE_BAD_REQUEST) if not reff: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) return NetworkService.make_json_response(reff)
def get(urn: str): """ Returns results for a frequency query from ANNIS for a given CTS URN and AQL. """ fa: List[FrequencyItem] = CorpusService.get_frequency_analysis(urn, is_csm=True) # map the abbreviated values found by ANNIS to our own model skip_set: Set[Phenomenon] = {Phenomenon.LEMMA, Phenomenon.DEPENDENCY} for fi in fa: for i in range(len(fi.values)): if fi.phenomena[i] in skip_set: continue value_map: Dict[str, List[str]] = AnnotationService.phenomenon_map[ fi.phenomena[i]] fi.values[i] = next( (x for x in value_map if fi.values[i] in value_map[x]), None) return NetworkService.make_json_response([x.to_dict() for x in fa])
def post(vocabulary_data: dict): """ Indicates for each token of a corpus whether it is covered by a reference vocabulary. """ vf: VocabularyForm = VocabularyForm.from_dict(vocabulary_data) vc: VocabularyCorpus = VocabularyCorpus[vf.vocabulary] vocabulary_set: Set[str] = FileService.get_vocabulary_set(vc, vf.frequency_upper_bound) # punctuation should count as a match because we don't want to count this as part of the vocabulary for char in string.punctuation: vocabulary_set.add(char) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=vf.query_urn, is_csm=False) for node in ar.graph_data.nodes: if not is_match(target_lemma=node.udep_lemma, vocabulary_set=vocabulary_set): node.is_oov = True ar: AnnisResponse = AnnisResponse( solutions=[], uri="", exercise_id="", graph_data=ar.graph_data) ar.text_complexity = TextComplexityService.text_complexity( TextComplexityMeasure.all.name, vf.query_urn, False, ar.graph_data).to_dict() return NetworkService.make_json_response(ar.to_dict())
def get(eid: str) -> Union[Response, ConnexionResponse]: exercise: TExercise = DatabaseService.query(Exercise, filter_by=dict(eid=eid), first=True) if not exercise: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_EXERCISE_NOT_FOUND) ar: AnnisResponse = CorpusService.get_corpus(cts_urn=exercise.urn, is_csm=False) if not ar.graph_data.nodes: return connexion.problem(404, Config.ERROR_TITLE_NOT_FOUND, Config.ERROR_MESSAGE_CORPUS_NOT_FOUND) exercise.last_access_time = datetime.utcnow().timestamp() DatabaseService.commit() exercise_type: ExerciseType = ExerciseType(exercise.exercise_type) ar.solutions = json.loads(exercise.solutions) ar.uri = NetworkService.get_exercise_uri(exercise) ar.exercise_id = exercise.eid ar.exercise_type = exercise_type.value return NetworkService.make_json_response(ar.to_dict())
def update_exercises(is_csm: bool) -> None: """Deletes old exercises.""" if DatabaseService.has_table(Config.DATABASE_TABLE_EXERCISE): exercises: List[Exercise] = DatabaseService.query(Exercise) now: datetime = datetime.utcnow() for exercise in exercises: exercise_datetime: datetime = datetime.fromtimestamp( exercise.last_access_time) # delete exercises that have not been accessed for a while, are not compatible anymore, or contain # corrupted / empty data if (now - exercise_datetime).total_seconds() > Config.INTERVAL_EXERCISE_DELETE or \ not exercise.urn or not json.loads(exercise.solutions): db.session.delete(exercise) DatabaseService.commit() # manually add text complexity measures for old exercises elif not exercise.text_complexity: ar: AnnisResponse = CorpusService.get_corpus(exercise.urn, is_csm=is_csm) tc: TextComplexity = TextComplexityService.text_complexity( TextComplexityMeasure.all.name, exercise.urn, is_csm, ar.graph_data) exercise.text_complexity = tc.all DatabaseService.commit()
def get(measure: str, urn: str): """Gives users measures of text complexity for a given text.""" ar: AnnisResponse = CorpusService.get_corpus(urn, is_csm=False) tc: TextComplexity = TextComplexityService.text_complexity( measure, urn, False, ar.graph_data) return NetworkService.make_json_response(tc.to_dict())