def test_similar_lemma_single_change(self): """ Ensure only similar features are fixed """ self.load_fixtures() token, change_record = WordToken.update(user_id=1, token_id=1, corpus_id=1, lemma="cil", morph="smn", POS="p") self.assertEqual((token.lemma, token.morph, token.POS), ("cil", "smn", "p"), "All that was None was not changed") similar = WordToken.get_similar_to_record(change_record) self.assertEqual([t.id for t in sorted(similar, key=lambda x: x.id)], [4, 5], "4 and 5 are similar") tokens = change_record.apply_changes_to(user_id=1, token_ids=[4, 5]) tok_4 = self.tok_with_id(tokens, 4) self.assertEqual(tok_4.lemma, "cil", "Lemma was updated") self.assertEqual(tok_4.morph, "mmn", "Morph stayed the same as it was not changed") self.assertEqual(tok_4.POS, "p", "POS stayed the same as it was not changed") tok_5 = self.tok_with_id(tokens, 5) self.assertEqual(tok_5.lemma, "cil", "Lemma was updated") self.assertEqual(tok_5.morph, "mmn", "Morph stayed the same as it was not changed") self.assertEqual(tok_5.POS, "n", "POS stayed the same as it was not changed")
def setUp(self): super(TokensSearchThroughFieldsBase, self).setUp() self.addCorpus(corpus="wauchier") new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID, order_id=1, form="Testword*", lemma="testword*", POS="TEST*pos", morph="test*morph", left_context="This is a left context", right_context="This is a left context") db.session.add(new_token) new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID, order_id=2, form="TestwordFake", lemma="testwordFake", POS="TESTposFake", morph="testmorphFake", left_context="This is a left context", right_context="This is a left context") db.session.add(new_token) new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID, order_id=3, form="!TestwordFake", lemma="!testwordFake", POS="!TESTposFake", morph="!testmorphFake", left_context="This is a left context", right_context="This is a left context") db.session.add(new_token) db.session.commit()
def test_add_batch_invalid(self): """Test adding a batch of tokens. Trying: one token violates length constraint Expecting: ValidationError """ form = "".join( string.ascii_letters[random.randint(0, len(string.ascii_letters) - 1)] for i in range(100)) with self.assertRaises(ValidationError): WordToken.add_batch(0, [{"form": form}])
def search_value_api(corpus_id, allowed_type): """ Find allowed values :param corpus_id: Id of the Corpus :param allowed_type: Type of allowed value (lemma, morph, POS) """ form = request.args.get("form", "") if not form.strip(): return jsonify([]) corpus = Corpus.query.get_or_404(corpus_id) if not corpus.has_access(current_user): abort(403) return jsonify( [ format_api_like_reply(result, allowed_type) for result in WordToken.get_like( filter_id=corpus_id, form=form, group_by=True, type_like=allowed_type, allowed_list=False ).limit(AUTOCOMPLETE_LIMIT) if result is not None ] )
def test_corpus_with_quotes(self): """ Test that a user can create a corpus and that this corpus has its data well recorded """ # Click register menu link self.driver.find_element_by_id("new_corpus_link").click() self.driver.implicitly_wait(15) # Fill in registration form name = "GUILLEMETS DE MONTMURAIL" self.driver.find_element_by_id("corpusName").send_keys(name) self.writeMultiline( self.driver.find_element_by_id("tokens"), "tokens\tlemmas\tPOS\tmorph\n" "\"\t\"\tPONC\tMORPH=EMPTY\n" # Testing with " Quote Char "\'\t\'\tPONC\tMORPH=EMPTY\n" # Testing with ' Quote Char "“\t“\tPONC\tMORPH=EMPTY\n" # Testing with “ Quote Char "”\t”\tPONC\tMORPH=EMPTY\n" # Testing with ” Quote Char "«\t«\tPONC\tMORPH=EMPTY\n" # Testing with « Quote Char "»\t»\tPONC\tMORPH=EMPTY\n" # Testing with » Quote Char "‘\t‘\tPONC\tMORPH=EMPTY\n" # Testing with ‘ Quote Char "’\t’\tPONC\tMORPH=EMPTY\n" # Testing with ’ Quote Char "„\t„\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char "《\t《\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char "》\t》\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char ) self.driver.find_element_by_id("label_checkbox_create").click() self.driver.find_element_by_id("submit").click() self.driver.implicitly_wait(15) self.assertIn( url_for('main.corpus_get', corpus_id=1), self.driver.current_url, "Result page is the corpus new page" ) self.assertEqual( db.session.query(Corpus).filter(Corpus.name == name).count(), 1, "There should be one well named corpus" ) corpus = db.session.query(Corpus).filter(Corpus.name == name).first() tokens = db.session.query(WordToken).filter(WordToken.corpus == corpus.id) self.assertEqual(tokens.count(), 11, "There should be 11 tokens") self.assertEqual( WordToken.to_input_format(tokens).replace("\r", ""), "token_id\tform\tlemma\tPOS\tmorph\n" "1\t\\\"\t\\\"\tPONC\tMORPH=EMPTY\n" # Testing with " Quote Char "2\t\'\t\'\tPONC\tMORPH=EMPTY\n" # Testing with ' Quote Char "3\t“\t“\tPONC\tMORPH=EMPTY\n" # Testing with “ Quote Char "4\t”\t”\tPONC\tMORPH=EMPTY\n" # Testing with ” Quote Char "5\t«\t«\tPONC\tMORPH=EMPTY\n" # Testing with « Quote Char "6\t»\t»\tPONC\tMORPH=EMPTY\n" # Testing with » Quote Char "7\t‘\t‘\tPONC\tMORPH=EMPTY\n" # Testing with ‘ Quote Char "8\t’\t’\tPONC\tMORPH=EMPTY\n" # Testing with ’ Quote Char "9\t„\t„\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char "10\t《\t《\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char "11\t》\t》\tPONC\tMORPH=EMPTY\n" # Testing with „ Quote Char )
def test_to_input_format(self): """ Test that export to input format works correctly """ self.addCorpus("floovant", tokens_up_to=3) self.assertEqual( WordToken.to_input_format( WordToken.query.filter(WordToken.corpus == 2)), "token_id\tform\tlemma\tPOS\tmorph\r\n" "1\tSOIGNORS\tseignor\t_\tNOMB.=p|GENRE=m|CAS=n\r\n" "2\tor\tor4\t_\tDEGRE=-\r\n" "3\tescoutez\tescouter\t_\tMODE=imp|PERS.=2|NOMB.=p\r\n")
def test_add_batch_valid(self): """Test adding a batch of tokens. Trying: one token respecting length constraint Expecting: number of tokens is returned """ form = "".join( string.ascii_letters[random.randint(0, len(string.ascii_letters) - 1)] for i in range(64)) self.assertEqual(WordToken.add_batch(0, [{"form": form}]), 1)
def test_update_batch_context(self): """Test updating left and right context. Trying: set right and left context to 4. """ form_list = [{ "form": "".join(string.ascii_letters[random.randint( 0, len(string.ascii_letters) - 1)] for i in range(16)) } for j in range(200)] WordToken.add_batch(0, form_list) self.assertEqual(WordToken.update_batch_context(0, 4, 4), 200) token = WordToken.query.filter_by(corpus=0, order_id=15).first() left_context = token.left_context.split(" ") right_context = token.right_context.split(" ") self.assertEqual(len(left_context), 4) self.assertEqual(len(right_context), 4) # WordToken order_id starts at 1, form_list indices starts at 0 self.assertEqual(left_context[0], form_list[10]["form"]) self.assertEqual(left_context[3], form_list[13]["form"]) self.assertEqual(right_context[0], form_list[15]["form"]) self.assertEqual(right_context[3], form_list[18]["form"])
def search_api(control_list_id, allowed_type): """ Find allowed values :param control_list_id: Id of the Control List :param allowed_type: Type of allowed value (lemma, morph, POS) """ return jsonify([ format_api_like_reply(result, allowed_type) for result in WordToken.get_like(filter_id=control_list_id, form=request.args.get("form"), group_by=True, type_like=allowed_type, allowed_list=True).limit(AUTOCOMPLETE_LIMIT) if result is not None ])
def test_similar_lemma_double_change(self): """ Ensure only similar features are fixed """ self.load_fixtures() token, change_record = WordToken.update(user_id=1, token_id=1, corpus_id=1, lemma="cil", morph="smn", POS="u") self.assertEqual((token.lemma, token.morph, token.POS), ("cil", "smn", "u"), "All that was different was changed") similar = WordToken.get_similar_to_record(change_record) self.assertEqual([t.id for t in sorted(similar, key=lambda x: x.id)], [ 3, 4, 5 ], "4 and 5 are similar; 3 has a common lemma with the new lemma created" ) tokens = change_record.apply_changes_to(user_id=1, token_ids=[3, 4, 5]) # 3 : Common lemma new with already "cil" in this token, but different P that needs to be updated tok_3 = self.tok_with_id(tokens, 3) self.assertEqual(tok_3.lemma, "cil", "Lemma was already the same") self.assertEqual(tok_3.morph, "smn", "Morph stayed the same as it was not changed") self.assertEqual(tok_3.POS, "u", "POS was changed") # 4 : Common old lemma, lemma updated + pos updated; morph ignored even if different tok_4 = self.tok_with_id(tokens, 4) self.assertEqual(tok_4.lemma, "cil", "Lemma was updated") self.assertEqual(tok_4.morph, "mmn", "Morph stayed the same as it was not changed") self.assertEqual(tok_4.POS, "u", "POS was updated") # 4 : Common old lemma, lemma updated updated; morph + POS ignored even if different tok_5 = self.tok_with_id(tokens, 5) self.assertEqual(tok_5.lemma, "cil", "Lemma was updated") self.assertEqual(tok_5.morph, "mmn", "Morph stayed the same as it was not changed") self.assertEqual( tok_5.POS, "n", "POS stayed the same as it was not common with original token") # Check number of change record ands IDS of lemmas crs = self.db.session.query(ChangeRecord).all() self.assertEqual(len(crs), 4, "There has been 1 original change and 3 others") self.assertEqual([ cr.word_token_id for cr in sorted(crs, key=lambda t: t.word_token_id) ], [1, 3, 4, 5], "Changed record should be about the right records") cr5 = [cr for cr in crs if cr.word_token_id == 5][0] self.assertEqual((cr5.lemma, cr5.morph, cr5.POS, cr5.lemma_new, cr5.morph_new, cr5.POS_new), ("celui", "mmn", "n", "cil", "mmn", "n"), "Change record should be correct") self.assertEqual(cr5.changed, ["lemma"]) cr4 = [cr for cr in crs if cr.word_token_id == 4][0] self.assertEqual((cr4.lemma, cr4.morph, cr4.POS, cr4.lemma_new, cr4.morph_new, cr4.POS_new), ("celui", "mmn", "p", "cil", "mmn", "u"), "Change record should be correct") self.assertCountEqual(cr4.changed, ["lemma", "POS"])
from app.models import ChangeRecord, WordToken, Corpus, ControlLists from .base import TestModels import copy SimilarityFixtures = [ ControlLists(id=1, name="CL Fixture"), Corpus(id=1, name="Fixtures !", control_lists_id=1), WordToken(corpus=1, form="Cil", lemma="celui", left_context="_", right_context="_", label_uniform="celui", morph="smn", POS="p"), # 1 WordToken(corpus=1, form="Cil", lemma="celle", left_context="_", right_context="_", label_uniform="celle", morph="smn", POS="n"), # 2 WordToken(corpus=1, form="Cil", lemma="cil", left_context="_", right_context="_", label_uniform="cil", morph="smn", POS="p"), # 3
Floovant = Corpus( name="Floovant", id=2, control_lists_id=2 ) FloovantColumns = [ Column(heading="Lemma", corpus_id=2), Column(heading="POS", corpus_id=2), Column(heading="Morph", corpus_id=2), Column(heading="Similar", corpus_id=2), ] FCL = ControlLists(id=2, name="Floovant") FloovantTokens = [ WordToken(corpus=Floovant.id, form="SOIGNORS", lemma="seignor", left_context="", right_context="or escoutez que", label_uniform="seignor", morph="NOMB.=p|GENRE=m|CAS=n"), WordToken(corpus=Floovant.id, form="or", lemma="or4", left_context="SOIGNORS", right_context="escoutez que Dés", label_uniform="or4", morph="DEGRE=-"), WordToken(corpus=Floovant.id, form="escoutez", lemma="escouter", left_context="SOIGNORS or", right_context="que Dés vos", label_uniform="escouter", morph="MODE=imp|PERS.=2|NOMB.=p"), WordToken(corpus=Floovant.id, form="que", lemma="que4", left_context="SOIGNORS or escoutez", right_context="Dés vos soit", label_uniform="que4", morph="_"), WordToken(corpus=Floovant.id, form="Dés", lemma="dieu", left_context="or escoutez que", right_context="vos soit amis", label_uniform="dieu", morph="NOMB.=s|GENRE=m|CAS=n"), WordToken(corpus=Floovant.id, form="vos", lemma="vos1", left_context="escoutez que Dés", right_context="soit amis III", label_uniform="vos1", morph="PERS.=2|NOMB.=p|GENRE=m|CAS=r"), WordToken(corpus=Floovant.id, form="soit", lemma="estre1", left_context="que Dés vos", right_context="amis III vers", label_uniform="estre1", morph="MODE=sub|TEMPS=pst|PERS.=3|NOMB.=s"), WordToken(corpus=Floovant.id, form="amis", lemma="ami", left_context="Dés vos soit", right_context="III vers de", label_uniform="ami", morph="NOMB.=s|GENRE=m|CAS=n"), WordToken(corpus=Floovant.id, form="III", lemma="trois1", left_context="vos soit amis",
from app.models import ControlLists control_list = ControlLists(id=3, name="Latin") corpus = Corpus( name="Priapees", id=3, control_lists_id=control_list.id, ) PriapeeColumns = [ Column(heading="Lemma", corpus_id=3), Column(heading="POS", corpus_id=3), Column(heading="Morph", corpus_id=3), Column(heading="Similar", corpus_id=3), ] tokens = [ WordToken(corpus=corpus.id, form="Carminis", lemma="carmen1", POS="NOMcom", left_context="Carminis incompti lusus lecture", right_context="procaces ,", label_uniform="carmen1", morph="Case=Gen|Numb=Sing"), WordToken(corpus=corpus.id, form="incompti", lemma="incomptus", POS="ADJqua", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens", label_uniform="incomptus", morph="Case=Gen|Numb=Sing|Deg=Pos"), WordToken(corpus=corpus.id, form="lusus", lemma="lusus", POS="NOMcom", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio", label_uniform="lusus", morph="Case=Gen|Numb=Sing"), WordToken(corpus=corpus.id, form="lecture", lemma="lego?", POS="VER", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone", label_uniform="lego?", morph="Case=Voc|Numb=Sing|Mood=Par|Voice=Act"), WordToken(corpus=corpus.id, form="procaces", lemma="procax", POS="ADJqua", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone supercilium", label_uniform="procax", morph="Case=Acc|Numb=Plur|Deg=Pos"), WordToken(corpus=corpus.id, form=",", lemma=",", POS="PUNC", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone supercilium .", label_uniform=",", morph="MORPH=empty"), WordToken(corpus=corpus.id, form="conueniens", lemma="conueniens", POS="ADJqua", left_context="incompti lusus lecture procaces", right_context=", conueniens Latio pone supercilium . non", label_uniform="conueniens", morph="Case=Nom|Numb=Sing|Deg=Pos"), WordToken(corpus=corpus.id, form="Latio", lemma="latio", POS="NOMcom", left_context="lusus lecture procaces ,", right_context="conueniens Latio pone supercilium . non soror", label_uniform="latio", morph="Case=Nom|Numb=Sing"), WordToken(corpus=corpus.id, form="pone", lemma="pono", POS="VER", left_context="lecture procaces , conueniens", right_context="Latio pone supercilium . non soror hoc", label_uniform="pono", morph="Numb=Sing|Mood=Imp|Tense=Pres|Voice=Act|Person=2"), WordToken(corpus=corpus.id, form="supercilium", lemma="supercilium", POS="NOMcom", left_context="procaces , conueniens Latio", right_context="pone supercilium . non soror hoc habitat", label_uniform="supercilium", morph="Case=Acc|Numb=Sing"), WordToken(corpus=corpus.id, form=".", lemma=".", POS="PUNC", left_context=", conueniens Latio pone", right_context="supercilium . non soror hoc habitat Phoebi", label_uniform=".", morph="MORPH=empty"), WordToken(corpus=corpus.id, form="non", lemma="non", POS="ADVneg", left_context="conueniens Latio pone supercilium", right_context=". non soror hoc habitat Phoebi ,", label_uniform="non", morph="MORPH=empty"), WordToken(corpus=corpus.id, form="soror", lemma="soror", POS="NOMcom", left_context="Latio pone supercilium .", right_context="non soror hoc habitat Phoebi , non", label_uniform="soror", morph="Case=Nom|Numb=Sing"), WordToken(corpus=corpus.id, form="hoc", lemma="hic1", POS="PROdem", left_context="pone supercilium . non", right_context="soror hoc habitat Phoebi , non uesta", label_uniform="hic1", morph="Case=Nom|Numb=Sing"), WordToken(corpus=corpus.id, form="habitat", lemma="habito", POS="VER", left_context="supercilium . non soror", right_context="hoc habitat Phoebi , non uesta sacello", label_uniform="habito", morph="Numb=Sing|Mood=Ind|Tense=Pres|Voice=Act|Person=3"), WordToken(corpus=corpus.id, form="Phoebi", lemma="phoebus", POS="NOMcom", left_context=". non soror hoc", right_context="habitat Phoebi , non uesta sacello ,", label_uniform="phoebus", morph="Case=Gen|Numb=Sing"),