def test_parse_dict_value(self): self.assertEqual(parse_dict_value("key1=val1"), OrderedDict([("key1", "val1")])) self.assertEqual(parse_dict_value("key1=val1|key2=val2"), OrderedDict([("key1", "val1"), ("key2", "val2")])) self.assertEqual(parse_dict_value(""), None) self.assertEqual(parse_dict_value("_"), None)
def test_parse_dict_value(self): self.assertEqual( parse_dict_value("key1"), Token([("key1", "")]) ) self.assertEqual( parse_dict_value("key1=val1"), Token([("key1", "val1")]) ) self.assertEqual( parse_dict_value("key1=val1|key2=val2"), Token([("key1", "val1"), ("key2", "val2")]) ) self.assertEqual( parse_dict_value("key1=val1|key2|key3=val3"), Token([("key1", "val1"), ("key2", ""), ("key3", "val3")]) ) self.assertEqual( parse_dict_value("key1=val1|key1=val2"), Token([("key1", "val2")]) ) self.assertEqual( parse_dict_value("key1=_|_|_=val1"), Token([("key1", None)]) ) self.assertEqual(parse_dict_value(""), None) self.assertEqual(parse_dict_value("_"), None)
def test_parse_CoNLL2009_1(self): data = dedent("""\ #\tid\tform\tlemma\tplemma\tpos\tppos\tfeats\tpfeats\thead\tphead\tdeprel\tpdeprel\tfillpred\tpred\tapreds 1\tZ\tz\tz\tR\tR\tSubPOS=R|Cas=2\tSubPOS=R|Cas=2\t10\t10\tAuxP\tAuxP\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_ 2\ttéto\ttento\ttento\tP\tP\tSubPOS=D|Gen=F|Num=S|Cas=2\tSubPOS=D|Gen=F|Num=S|Cas=2\t3\t3\tAtr\tAtr\tY\ttento\t_\tRSTR\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_ 3\tknihy\tkniha\tkniha\tN\tN\tSubPOS=N|Gen=F|Num=S|Cas=2|Neg=A\tSubPOS=N|Gen=F|Num=S|Cas=2|Neg=A\t1\t1\tAdv\tAdv\tY\tkniha\t_\t_\t_\t_\t_\t_\t_\tDIR1\t_\t_\t_\t_\t_\t_\t_\t_ """) sentences = parse( data, fields=( 'id', 'form', 'lemma', 'plemma', 'pos', 'ppos', 'feats', 'pfeats', 'head', 'phead', 'deprel', 'pdeprel', 'fillpred', 'pred', 'apreds' ), field_parsers={ "pfeats": lambda line, i: parse_dict_value(line[i]), "phead": lambda line, i: parse_int_value(line[i]), "apreds": lambda line, i: [ apred_field if apred_field != "_" else None for apred_field in line[i:len(line)] ], }, ) self.assertEqual( sentences[0][2], OrderedDict([ ('id', 3), ('form', 'knihy'), ('lemma', 'kniha'), ('plemma', 'kniha'), ('pos', 'N'), ('ppos', 'N'), ('feats', OrderedDict([ ('SubPOS', 'N'), ('Gen', 'F'), ('Num', 'S'), ('Cas', '2'), ('Neg', 'A') ])), ('pfeats', OrderedDict([ ('SubPOS', 'N'), ('Gen', 'F'), ('Num', 'S'), ('Cas', '2'), ('Neg', 'A') ])), ('head', 1), ('phead', 1), ('deprel', 'Adv'), ('pdeprel', 'Adv'), ('fillpred', 'Y'), ('pred', 'kniha'), ('apreds', [ None, None, None, None, None, None, None, 'DIR1', None, None, None, None, None, None, None, None ]) ]) )
def test_parse_CoNLL2009(self): field_parsers = DEFAULT_FIELD_PARSERS.copy() field_parsers.update({ "pfeats": lambda line, i: parse_dict_value(line[i]), "phead": lambda line, i: parse_int_value(line[i]), "apreds": lambda line, i: TestParseCoNLL2009.parse_apreds(line[i:len(line)]), }) from tests.fixtures import TESTCASES_CONLL2009 sentences = parse( TESTCASES_CONLL2009[0], fields=('id', 'form', 'lemma', 'plemma', 'pos', 'ppos', 'feats', 'pfeats', 'head', 'phead', 'deprel', 'pdeprel', 'fillpred', 'pred', 'apreds'), field_parsers=field_parsers, ) self.assertEqual( sentences[0][2], OrderedDict([('id', 3), ('form', 'knihy'), ('lemma', 'kniha'), ('plemma', 'kniha'), ('pos', 'N'), ('ppos', 'N'), ('feats', OrderedDict([('SubPOS', 'N'), ('Gen', 'F'), ('Num', 'S'), ('Cas', '2'), ('Neg', 'A')])), ('pfeats', OrderedDict([('SubPOS', 'N'), ('Gen', 'F'), ('Num', 'S'), ('Cas', '2'), ('Neg', 'A')])), ('head', 1), ('phead', 1), ('deprel', 'Adv'), ('pdeprel', 'Adv'), ('fillpred', 'Y'), ('pred', 'kniha'), ('apreds', [ None, None, None, None, None, None, None, 'DIR1', None, None, None, None, None, None, None, None ])]))