コード例 #1
0
    def test_similar_lemma_single_change(self):
        """ Ensure only similar features are fixed """
        self.load_fixtures()
        token, change_record = WordToken.update(user_id=1,
                                                token_id=1,
                                                corpus_id=1,
                                                lemma="cil",
                                                morph="smn",
                                                POS="p")
        self.assertEqual((token.lemma, token.morph, token.POS),
                         ("cil", "smn", "p"),
                         "All that was None was not changed")
        similar = WordToken.get_similar_to_record(change_record)
        self.assertEqual([t.id for t in sorted(similar, key=lambda x: x.id)],
                         [4, 5], "4 and 5 are similar")

        tokens = change_record.apply_changes_to(user_id=1, token_ids=[4, 5])
        tok_4 = self.tok_with_id(tokens, 4)
        self.assertEqual(tok_4.lemma, "cil", "Lemma was updated")
        self.assertEqual(tok_4.morph, "mmn",
                         "Morph stayed the same as it was not changed")
        self.assertEqual(tok_4.POS, "p",
                         "POS stayed the same as it was not changed")

        tok_5 = self.tok_with_id(tokens, 5)
        self.assertEqual(tok_5.lemma, "cil", "Lemma was updated")
        self.assertEqual(tok_5.morph, "mmn",
                         "Morph stayed the same as it was not changed")
        self.assertEqual(tok_5.POS, "n",
                         "POS stayed the same as it was not changed")
コード例 #2
0
    def setUp(self):
        super(TokensSearchThroughFieldsBase, self).setUp()
        self.addCorpus(corpus="wauchier")
        new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID,
                              order_id=1,
                              form="Testword*",
                              lemma="testword*",
                              POS="TEST*pos",
                              morph="test*morph",
                              left_context="This is a left context",
                              right_context="This is a left context")
        db.session.add(new_token)
        new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID,
                              order_id=2,
                              form="TestwordFake",
                              lemma="testwordFake",
                              POS="TESTposFake",
                              morph="testmorphFake",
                              left_context="This is a left context",
                              right_context="This is a left context")
        db.session.add(new_token)

        new_token = WordToken(corpus=TokensSearchThroughFieldsBase.CORPUS_ID,
                              order_id=3,
                              form="!TestwordFake",
                              lemma="!testwordFake",
                              POS="!TESTposFake",
                              morph="!testmorphFake",
                              left_context="This is a left context",
                              right_context="This is a left context")
        db.session.add(new_token)
        db.session.commit()
コード例 #3
0
    def test_add_batch_invalid(self):
        """Test adding a batch of tokens.

        Trying: one token violates length constraint
        Expecting: ValidationError
        """
        form = "".join(
            string.ascii_letters[random.randint(0,
                                                len(string.ascii_letters) - 1)]
            for i in range(100))
        with self.assertRaises(ValidationError):
            WordToken.add_batch(0, [{"form": form}])
コード例 #4
0
ファイル: corpus.py プロジェクト: hipster-philology/pyrrha
def search_value_api(corpus_id, allowed_type):
    """ Find allowed values

    :param corpus_id: Id of the Corpus
    :param allowed_type: Type of allowed value (lemma, morph, POS)
    """
    form = request.args.get("form", "")
    if not form.strip():
        return jsonify([])
    corpus = Corpus.query.get_or_404(corpus_id)
    if not corpus.has_access(current_user):
        abort(403)
    return jsonify(
        [
            format_api_like_reply(result, allowed_type)
            for result in WordToken.get_like(
                filter_id=corpus_id,
                form=form,
                group_by=True,
                type_like=allowed_type,
                allowed_list=False
            ).limit(AUTOCOMPLETE_LIMIT)
            if result is not None
        ]
    )
コード例 #5
0
    def test_corpus_with_quotes(self):
        """
        Test that a user can create a corpus and that this corpus has its data well recorded
        """

        # Click register menu link
        self.driver.find_element_by_id("new_corpus_link").click()
        self.driver.implicitly_wait(15)

        # Fill in registration form
        name = "GUILLEMETS DE MONTMURAIL"
        self.driver.find_element_by_id("corpusName").send_keys(name)
        self.writeMultiline(
            self.driver.find_element_by_id("tokens"),
            "tokens\tlemmas\tPOS\tmorph\n"
            "\"\t\"\tPONC\tMORPH=EMPTY\n"  # Testing with " Quote Char
            "\'\t\'\tPONC\tMORPH=EMPTY\n"  # Testing with ' Quote Char
            "“\t“\tPONC\tMORPH=EMPTY\n"  # Testing with “ Quote Char
            "”\t”\tPONC\tMORPH=EMPTY\n"  # Testing with ” Quote Char
            "«\t«\tPONC\tMORPH=EMPTY\n"  # Testing with « Quote Char
            "»\t»\tPONC\tMORPH=EMPTY\n"  # Testing with » Quote Char
            "‘\t‘\tPONC\tMORPH=EMPTY\n"  # Testing with ‘ Quote Char
            "’\t’\tPONC\tMORPH=EMPTY\n"  # Testing with ’ Quote Char
            "„\t„\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
            "《\t《\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
            "》\t》\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
        )
        self.driver.find_element_by_id("label_checkbox_create").click()
        self.driver.find_element_by_id("submit").click()
        self.driver.implicitly_wait(15)

        self.assertIn(
            url_for('main.corpus_get', corpus_id=1), self.driver.current_url,
            "Result page is the corpus new page"
        )

        self.assertEqual(
            db.session.query(Corpus).filter(Corpus.name == name).count(), 1,
            "There should be one well named corpus"
        )
        corpus = db.session.query(Corpus).filter(Corpus.name == name).first()
        tokens = db.session.query(WordToken).filter(WordToken.corpus == corpus.id)
        self.assertEqual(tokens.count(), 11, "There should be 11 tokens")
        self.assertEqual(
            WordToken.to_input_format(tokens).replace("\r", ""),
            "token_id\tform\tlemma\tPOS\tmorph\n"
            "1\t\\\"\t\\\"\tPONC\tMORPH=EMPTY\n"  # Testing with " Quote Char
            "2\t\'\t\'\tPONC\tMORPH=EMPTY\n"  # Testing with ' Quote Char
            "3\t“\t“\tPONC\tMORPH=EMPTY\n"  # Testing with “ Quote Char
            "4\t”\t”\tPONC\tMORPH=EMPTY\n"  # Testing with ” Quote Char
            "5\t«\t«\tPONC\tMORPH=EMPTY\n"  # Testing with « Quote Char
            "6\t»\t»\tPONC\tMORPH=EMPTY\n"  # Testing with » Quote Char
            "7\t‘\t‘\tPONC\tMORPH=EMPTY\n"  # Testing with ‘ Quote Char
            "8\t’\t’\tPONC\tMORPH=EMPTY\n"  # Testing with ’ Quote Char
            "9\t„\t„\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
            "10\t《\t《\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
            "11\t》\t》\tPONC\tMORPH=EMPTY\n"  # Testing with „ Quote Char
        )
コード例 #6
0
 def test_to_input_format(self):
     """ Test that export to input format works correctly """
     self.addCorpus("floovant", tokens_up_to=3)
     self.assertEqual(
         WordToken.to_input_format(
             WordToken.query.filter(WordToken.corpus == 2)),
         "token_id\tform\tlemma\tPOS\tmorph\r\n"
         "1\tSOIGNORS\tseignor\t_\tNOMB.=p|GENRE=m|CAS=n\r\n"
         "2\tor\tor4\t_\tDEGRE=-\r\n"
         "3\tescoutez\tescouter\t_\tMODE=imp|PERS.=2|NOMB.=p\r\n")
コード例 #7
0
    def test_add_batch_valid(self):
        """Test adding a batch of tokens.

        Trying: one token respecting length constraint
        Expecting: number of tokens is returned
        """
        form = "".join(
            string.ascii_letters[random.randint(0,
                                                len(string.ascii_letters) - 1)]
            for i in range(64))
        self.assertEqual(WordToken.add_batch(0, [{"form": form}]), 1)
コード例 #8
0
    def test_update_batch_context(self):
        """Test updating left and right context.

        Trying: set right and left context to 4.
        """
        form_list = [{
            "form":
            "".join(string.ascii_letters[random.randint(
                0,
                len(string.ascii_letters) - 1)] for i in range(16))
        } for j in range(200)]
        WordToken.add_batch(0, form_list)
        self.assertEqual(WordToken.update_batch_context(0, 4, 4), 200)
        token = WordToken.query.filter_by(corpus=0, order_id=15).first()
        left_context = token.left_context.split(" ")
        right_context = token.right_context.split(" ")
        self.assertEqual(len(left_context), 4)
        self.assertEqual(len(right_context), 4)
        # WordToken order_id starts at 1, form_list indices starts at 0
        self.assertEqual(left_context[0], form_list[10]["form"])
        self.assertEqual(left_context[3], form_list[13]["form"])
        self.assertEqual(right_context[0], form_list[15]["form"])
        self.assertEqual(right_context[3], form_list[18]["form"])
コード例 #9
0
def search_api(control_list_id, allowed_type):
    """ Find allowed values

    :param control_list_id: Id of the Control List
    :param allowed_type: Type of allowed value (lemma, morph, POS)
    """
    return jsonify([
        format_api_like_reply(result, allowed_type) for result in
        WordToken.get_like(filter_id=control_list_id,
                           form=request.args.get("form"),
                           group_by=True,
                           type_like=allowed_type,
                           allowed_list=True).limit(AUTOCOMPLETE_LIMIT)
        if result is not None
    ])
コード例 #10
0
    def test_similar_lemma_double_change(self):
        """ Ensure only similar features are fixed """
        self.load_fixtures()
        token, change_record = WordToken.update(user_id=1,
                                                token_id=1,
                                                corpus_id=1,
                                                lemma="cil",
                                                morph="smn",
                                                POS="u")
        self.assertEqual((token.lemma, token.morph, token.POS),
                         ("cil", "smn", "u"),
                         "All that was different was changed")
        similar = WordToken.get_similar_to_record(change_record)
        self.assertEqual([t.id for t in sorted(similar, key=lambda x: x.id)], [
            3, 4, 5
        ], "4 and 5 are similar; 3 has a common lemma with the new lemma created"
                         )

        tokens = change_record.apply_changes_to(user_id=1, token_ids=[3, 4, 5])
        # 3 : Common lemma new with already "cil" in this token, but different P that needs to be updated
        tok_3 = self.tok_with_id(tokens, 3)
        self.assertEqual(tok_3.lemma, "cil", "Lemma was already the same")
        self.assertEqual(tok_3.morph, "smn",
                         "Morph stayed the same as it was not changed")
        self.assertEqual(tok_3.POS, "u", "POS was changed")

        # 4 : Common old lemma, lemma updated + pos updated; morph ignored even if different
        tok_4 = self.tok_with_id(tokens, 4)
        self.assertEqual(tok_4.lemma, "cil", "Lemma was updated")
        self.assertEqual(tok_4.morph, "mmn",
                         "Morph stayed the same as it was not changed")
        self.assertEqual(tok_4.POS, "u", "POS was updated")

        # 4 : Common old lemma, lemma updated updated; morph + POS ignored even if different
        tok_5 = self.tok_with_id(tokens, 5)
        self.assertEqual(tok_5.lemma, "cil", "Lemma was updated")
        self.assertEqual(tok_5.morph, "mmn",
                         "Morph stayed the same as it was not changed")
        self.assertEqual(
            tok_5.POS, "n",
            "POS stayed the same as it was not common with original token")

        # Check number of change record ands IDS of lemmas
        crs = self.db.session.query(ChangeRecord).all()
        self.assertEqual(len(crs), 4,
                         "There has been 1 original change and 3 others")
        self.assertEqual([
            cr.word_token_id
            for cr in sorted(crs, key=lambda t: t.word_token_id)
        ], [1, 3, 4, 5], "Changed record should be about the right records")
        cr5 = [cr for cr in crs if cr.word_token_id == 5][0]
        self.assertEqual((cr5.lemma, cr5.morph, cr5.POS, cr5.lemma_new,
                          cr5.morph_new, cr5.POS_new),
                         ("celui", "mmn", "n", "cil", "mmn", "n"),
                         "Change record should be correct")
        self.assertEqual(cr5.changed, ["lemma"])

        cr4 = [cr for cr in crs if cr.word_token_id == 4][0]
        self.assertEqual((cr4.lemma, cr4.morph, cr4.POS, cr4.lemma_new,
                          cr4.morph_new, cr4.POS_new),
                         ("celui", "mmn", "p", "cil", "mmn", "u"),
                         "Change record should be correct")
        self.assertCountEqual(cr4.changed, ["lemma", "POS"])
コード例 #11
0
from app.models import ChangeRecord, WordToken, Corpus, ControlLists
from .base import TestModels
import copy

SimilarityFixtures = [
    ControlLists(id=1, name="CL Fixture"),
    Corpus(id=1, name="Fixtures !", control_lists_id=1),
    WordToken(corpus=1,
              form="Cil",
              lemma="celui",
              left_context="_",
              right_context="_",
              label_uniform="celui",
              morph="smn",
              POS="p"),  # 1
    WordToken(corpus=1,
              form="Cil",
              lemma="celle",
              left_context="_",
              right_context="_",
              label_uniform="celle",
              morph="smn",
              POS="n"),  # 2
    WordToken(corpus=1,
              form="Cil",
              lemma="cil",
              left_context="_",
              right_context="_",
              label_uniform="cil",
              morph="smn",
              POS="p"),  # 3
コード例 #12
0
ファイル: floovant.py プロジェクト: hipster-philology/pyrrha

Floovant = Corpus(
    name="Floovant",
    id=2,
    control_lists_id=2
)
FloovantColumns = [
    Column(heading="Lemma", corpus_id=2),
    Column(heading="POS", corpus_id=2),
    Column(heading="Morph", corpus_id=2),
    Column(heading="Similar", corpus_id=2),
]
FCL = ControlLists(id=2, name="Floovant")
FloovantTokens = [
    WordToken(corpus=Floovant.id, form="SOIGNORS", lemma="seignor", left_context="", right_context="or escoutez que",
              label_uniform="seignor", morph="NOMB.=p|GENRE=m|CAS=n"),
    WordToken(corpus=Floovant.id, form="or", lemma="or4", left_context="SOIGNORS", right_context="escoutez que Dés",
              label_uniform="or4", morph="DEGRE=-"),
    WordToken(corpus=Floovant.id, form="escoutez", lemma="escouter", left_context="SOIGNORS or",
              right_context="que Dés vos", label_uniform="escouter", morph="MODE=imp|PERS.=2|NOMB.=p"),
    WordToken(corpus=Floovant.id, form="que", lemma="que4", left_context="SOIGNORS or escoutez",
              right_context="Dés vos soit", label_uniform="que4", morph="_"),
    WordToken(corpus=Floovant.id, form="Dés", lemma="dieu", left_context="or escoutez que",
              right_context="vos soit amis", label_uniform="dieu", morph="NOMB.=s|GENRE=m|CAS=n"),
    WordToken(corpus=Floovant.id, form="vos", lemma="vos1", left_context="escoutez que Dés",
              right_context="soit amis III", label_uniform="vos1", morph="PERS.=2|NOMB.=p|GENRE=m|CAS=r"),
    WordToken(corpus=Floovant.id, form="soit", lemma="estre1", left_context="que Dés vos",
              right_context="amis III vers", label_uniform="estre1", morph="MODE=sub|TEMPS=pst|PERS.=3|NOMB.=s"),
    WordToken(corpus=Floovant.id, form="amis", lemma="ami", left_context="Dés vos soit", right_context="III vers de",
              label_uniform="ami", morph="NOMB.=s|GENRE=m|CAS=n"),
    WordToken(corpus=Floovant.id, form="III", lemma="trois1", left_context="vos soit amis",
コード例 #13
0
from app.models import ControlLists

control_list = ControlLists(id=3, name="Latin")
corpus = Corpus(
    name="Priapees",
    id=3,
    control_lists_id=control_list.id,
)
PriapeeColumns = [
    Column(heading="Lemma", corpus_id=3),
    Column(heading="POS", corpus_id=3),
    Column(heading="Morph", corpus_id=3),
    Column(heading="Similar", corpus_id=3),
]
tokens = [
    WordToken(corpus=corpus.id, form="Carminis", lemma="carmen1", POS="NOMcom", left_context="Carminis incompti lusus lecture", right_context="procaces ,", label_uniform="carmen1", morph="Case=Gen|Numb=Sing"),
    WordToken(corpus=corpus.id, form="incompti", lemma="incomptus", POS="ADJqua", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens", label_uniform="incomptus", morph="Case=Gen|Numb=Sing|Deg=Pos"),
    WordToken(corpus=corpus.id, form="lusus", lemma="lusus", POS="NOMcom", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio", label_uniform="lusus", morph="Case=Gen|Numb=Sing"),
    WordToken(corpus=corpus.id, form="lecture", lemma="lego?", POS="VER", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone", label_uniform="lego?", morph="Case=Voc|Numb=Sing|Mood=Par|Voice=Act"),
    WordToken(corpus=corpus.id, form="procaces", lemma="procax", POS="ADJqua", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone supercilium", label_uniform="procax", morph="Case=Acc|Numb=Plur|Deg=Pos"),
    WordToken(corpus=corpus.id, form=",", lemma=",", POS="PUNC", left_context="Carminis incompti lusus lecture", right_context="procaces , conueniens Latio pone supercilium .", label_uniform=",", morph="MORPH=empty"),
    WordToken(corpus=corpus.id, form="conueniens", lemma="conueniens", POS="ADJqua", left_context="incompti lusus lecture procaces", right_context=", conueniens Latio pone supercilium . non", label_uniform="conueniens", morph="Case=Nom|Numb=Sing|Deg=Pos"),
    WordToken(corpus=corpus.id, form="Latio", lemma="latio", POS="NOMcom", left_context="lusus lecture procaces ,", right_context="conueniens Latio pone supercilium . non soror", label_uniform="latio", morph="Case=Nom|Numb=Sing"),
    WordToken(corpus=corpus.id, form="pone", lemma="pono", POS="VER", left_context="lecture procaces , conueniens", right_context="Latio pone supercilium . non soror hoc", label_uniform="pono", morph="Numb=Sing|Mood=Imp|Tense=Pres|Voice=Act|Person=2"),
    WordToken(corpus=corpus.id, form="supercilium", lemma="supercilium", POS="NOMcom", left_context="procaces , conueniens Latio", right_context="pone supercilium . non soror hoc habitat", label_uniform="supercilium", morph="Case=Acc|Numb=Sing"),
    WordToken(corpus=corpus.id, form=".", lemma=".", POS="PUNC", left_context=", conueniens Latio pone", right_context="supercilium . non soror hoc habitat Phoebi", label_uniform=".", morph="MORPH=empty"),
    WordToken(corpus=corpus.id, form="non", lemma="non", POS="ADVneg", left_context="conueniens Latio pone supercilium", right_context=". non soror hoc habitat Phoebi ,", label_uniform="non", morph="MORPH=empty"),
    WordToken(corpus=corpus.id, form="soror", lemma="soror", POS="NOMcom", left_context="Latio pone supercilium .", right_context="non soror hoc habitat Phoebi , non", label_uniform="soror", morph="Case=Nom|Numb=Sing"),
    WordToken(corpus=corpus.id, form="hoc", lemma="hic1", POS="PROdem", left_context="pone supercilium . non", right_context="soror hoc habitat Phoebi , non uesta", label_uniform="hic1", morph="Case=Nom|Numb=Sing"),
    WordToken(corpus=corpus.id, form="habitat", lemma="habito", POS="VER", left_context="supercilium . non soror", right_context="hoc habitat Phoebi , non uesta sacello", label_uniform="habito", morph="Numb=Sing|Mood=Ind|Tense=Pres|Voice=Act|Person=3"),
    WordToken(corpus=corpus.id, form="Phoebi", lemma="phoebus", POS="NOMcom", left_context=". non soror hoc", right_context="habitat Phoebi , non uesta sacello ,", label_uniform="phoebus", morph="Case=Gen|Numb=Sing"),