def rewrite_MOOD(word): value = word.feat_map()['MOOD'] if value == 'Ind': # indicative return [('Mood', 'Ind')] elif value == 'Imprt': # imperative return [('Mood', 'Imp')] elif value == 'Cond': # conditional return [('Mood', 'Cnd')] elif value == 'Pot': # potential return [('Mood', 'Pot')] elif value == 'Opt': # optative # Omorfi defines the archaic optative mood (e.g. "kävellös"), # which is extremely rare (e.g. no occurrences in TDT). Should # this ever appear, we will approximate as imperative. warn('mapping optative mood to imperative') return [('Mood', 'Imp')] elif value == 'Eve': # eventive # Omorfi defines the archaic eventive mood (e.g. "kävelleisin"), # which is extremely rare (e.g. no occurrences in TDT). Should # this ever appear, we will approximate as potential. warn('mapping eventive mood to potential') return [('Mood', 'Pot')] else: return [] #assert False, 'unknown MOOD value ' + value
def setUp(self): if self.skip: return # Find a free port for port in range(8800, 8900): self.port = port # Start servertest.py in a subprocess cmd = [sys.executable, serverscript, self.server, str(port)] cmd += sys.argv[1:] # pass cmdline arguments to subprocesses self.p = Popen(cmd, stdout=PIPE, stderr=PIPE) # Wait for the socket to accept connections for i in xrange(100): time.sleep(0.1) # Accepts connections? if ping('127.0.0.1', port): return # Server died for some reason... if not self.p.poll() is None: break rv = self.p.poll() if rv is None: raise AssertionError("Server took to long to start up.") if rv is 128: # Import error tools.warn("Skipping %r test (ImportError)." % self.server) self.skip = True return if rv is 3: # Port in use continue raise AssertionError("Could not find a free port to test server.")
def add_VerbForm(word): fmap = word.feat_map() if word.cpostag not in VERB_TAGS: return [] # (see https://github.com/TurkuNLP/UniversalFinnish/issues/28) if 'INF' in fmap: # infinitive assert 'PCP' not in fmap, 'INF and PCP' assert 'PRS' not in fmap, 'INF and PRS' assert 'MOOD' not in fmap, 'INF and MOOD' value = fmap['INF'] if value in ('Inf1', 'Inf2', 'Inf3'): return [('VerbForm', 'Inf')] else: warn(u'unexpected INF value ' + value) return [] if 'PCP' in fmap: # participle assert 'INF' not in fmap, 'PCP and INF' assert 'PRS' not in fmap, 'PCP and PRS' assert 'MOOD' not in fmap, 'PCP and MOOD' return [('VerbForm', 'Part')] else: # Should be finite, check for some marker. We consider any # non-infinitive, non-participle verb finite if it has either # MOOD or PRS. # (https://github.com/TurkuNLP/UniversalFinnish/issues/28) if 'MOOD' in fmap or 'PRS' in fmap: return [('VerbForm', 'Fin')] else: warn(u'failed to assign VerbForm to ' + unicode(word)) return []
def rewrite_PRON_SUBCAT(word): value = word.feat_map()['SUBCAT'] if value == 'Dem': # demonstrative return [('PronType', 'Dem')] elif value == 'Pers': # personal return [('PronType', 'Prs')] elif value == 'Rel': # relative return [('PronType', 'Rel')] elif value == 'Indef': # indefinite return [('PronType', 'Ind')] elif value == 'Interr': # interrogative return [('PronType', 'Int')] elif value == 'Recipr': # reciprocal return [('PronType', 'Rcp')] elif value == 'Refl': # reflexive # NOTE: UD defines Reflexive as a separate feature from PronType # (http://universaldependencies.github.io/docs/u/feat/Reflex.html) # TODO: consider adding PronType also? return [('Reflex', 'Yes')] elif value == 'Qnt': # NOTE: UD does not define "quantifier" as a pronoun type, so # these are (tentatively) mapped to the closest corresponding # subcategory, indefinite pronouns. # see https://github.com/TurkuNLP/UniversalFinnish/issues/37 warn('mapping PRON SUBCAT ' + value + ' to Ind') return [('PronType', 'Ind')] else: return []
def test_json(self): self.app.route('/')(lambda: {'a': 1}) try: self.assertBody(bottle.json_dumps({'a': 1})) self.assertHeader('Content-Type','application/json') except ImportError: warn("Skipping JSON tests.")
def rewrite_PRON_SUBCAT(word): value = word.feat_map()["SUBCAT"] if value == "Dem": # demonstrative return [("PronType", "Dem")] elif value == "Pers": # personal return [("PronType", "Prs")] elif value == "Rel": # relative return [("PronType", "Rel")] elif value == "Indef": # indefinite return [("PronType", "Ind")] elif value == "Interr": # interrogative return [("PronType", "Int")] elif value == "Recipr": # reciprocal return [("PronType", "Rcp")] elif value == "Refl": # reflexive # NOTE: UD defines Reflexive as a separate feature from PronType # (http://universaldependencies.github.io/docs/u/feat/Reflex.html) # TODO: consider adding PronType also? return [("Reflex", "Yes")] elif value == "Qnt": # NOTE: UD does not define "quantifier" as a pronoun type, so # these are (tentatively) mapped to the closest corresponding # subcategory, indefinite pronouns. # see https://github.com/TurkuNLP/UniversalFinnish/issues/37 warn("mapping PRON SUBCAT " + value + " to Ind") return [("PronType", "Ind")] else: return []
def rewrite_SUBCAT(word): for tagset, func in subcat_rewrite_func: if word.cpostag in tagset: return func(word) warn(word.cpostag + " with SUBCAT") return []
def rewrite_SUBCAT(word): for tagset, func in subcat_rewrite_func: if word.cpostag in tagset: return func(word) warn(word.cpostag + ' with SUBCAT') return []
def rewrite_MOOD(word): value = word.feat_map()["MOOD"] if value == "Ind": # indicative return [("Mood", "Ind")] elif value == "Imprt": # imperative return [("Mood", "Imp")] elif value == "Cond": # conditional return [("Mood", "Cnd")] elif value == "Pot": # potential return [("Mood", "Pot")] elif value == "Opt": # optative # Omorfi defines the archaic optative mood (e.g. "kävellös"), # which is extremely rare (e.g. no occurrences in TDT). Should # this ever appear, we will approximate as imperative. warn("mapping optative mood to imperative") return [("Mood", "Imp")] elif value == "Eve": # eventive # Omorfi defines the archaic eventive mood (e.g. "kävelleisin"), # which is extremely rare (e.g. no occurrences in TDT). Should # this ever appear, we will approximate as potential. warn("mapping eventive mood to potential") return [("Mood", "Pot")] else: return [] # assert False, 'unknown MOOD value ' + value
def test_json_HTTPResponse(self): self.app.route('/')(lambda: py3web.HTTPResponse({'a': 1}, 500)) try: self.assertBody(py3web.json_dumps({'a': 1})) self.assertHeader('Content-Type','application/json') except ImportError: warn("Skipping JSON tests.")
def rewrite_A(word): # Assign "Pron" to instances of particular words given "A" # analyses by lemma. # (see https://github.com/TurkuNLP/UniversalFinnish/issues/66) if word.lemma in ('muu', 'sama'): warn('assigning %s to Pron/PRON' % word.form) return ('PRON', 'Pron') return 'ADJ'
def rewrite_TENSE(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + ' with TENSE') value = word.feat_map()['TENSE'] if value == 'Prs': return [('Tense', 'Pres')] elif value == 'Prt': return [('Tense', 'Past')] else: return [] #assert False, 'unknown TENSE value %s' % value
def rewrite_VOICE(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + ' with VOICE') value = word.feat_map()['VOICE'] if value == 'Act': return [('Voice', 'Act')] elif value == 'Pass': return [('Voice', 'Pass')] else: return [] #assert False, 'unknown VOICE value %s' % value
def rewrite_VOICE(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + " with VOICE") value = word.feat_map()["VOICE"] if value == "Act": return [("Voice", "Act")] elif value == "Pass": return [("Voice", "Pass")] else: return [] # assert False, 'unknown VOICE value %s' % value
def rewrite_TENSE(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + " with TENSE") value = word.feat_map()["TENSE"] if value == "Prs": return [("Tense", "Pres")] elif value == "Prt": return [("Tense", "Past")] else: return [] # assert False, 'unknown TENSE value %s' % value
def test_json_serialization_error(self): """ Verify that 500 errors serializing dictionaries don't return content-type application/json """ self.app.route('/')(lambda: {'a': set()}) try: self.assertStatus(500) self.assertHeader('Content-Type','text/html; charset=UTF-8') except ImportError: warn("Skipping JSON tests.")
def rewrite_Punct(word): # NOTE: likely not a final mapping, see # https://github.com/TurkuNLP/UniversalFinnish/issues/1 if is_symbol(word.form): assert not is_punctuation(word.form), 'internal error' return 'SYM' elif is_punctuation(word.form): assert not is_symbol(word.form), 'internal error' return 'PUNCT' else: warn(u'assigning SYM to unrecognized word ' + word.form) return 'SYM'
def rewrite_Pron(word): # Assign "A" to pro-adjectives such as "millainen" based on lemma # (see https://github.com/TurkuNLP/UniversalFinnish/issues/67). if word.lemma in pro_adjective_lemmas: warn('assigning %s to A/ADJ' % word.form) return ('ADJ', 'A') # NOTE: this is not a full mapping: some words tagged Pron should # map into DET instead. See # https://github.com/TurkuNLP/UniversalFinnish/issues/1, # https://github.com/TurkuNLP/UniversalFinnish/issues/27, # https://github.com/UniversalDependencies/docs/issues/97. # However, we're currently postponing this exception. return 'PRON'
def rewrite_pos(sentence): for w in sentence.words(): try: rewritten = rewrite_func[w.cpostag](w) except KeyError: warn(u'unexpected cpostag ' + w.cpostag) assert False, 'unexpected cpostag ' + w.cpostag # if rewrite_func returns a tuple, assign both cpostag and # postag; otherwise assign only cpostag if isinstance(rewritten, tuple): w.cpostag, w.postag = rewritten else: w.cpostag = rewritten
def rewrite_CMP(word): value = word.feat_map()['CMP'] if word.cpostag not in (ADJ_TAGS | VERB_TAGS | ADV_TAGS): warn(word.cpostag + ' with CMP') if value == 'Comp': return [('Degree', 'Cmp')] elif value == 'Pos': return [('Degree', 'Pos')] elif value == 'Superl': return [('Degree', 'Sup')] else: return [] #assert False, 'unknown CMP value ' + value
def rewrite_CMP(word): value = word.feat_map()["CMP"] if word.cpostag not in (ADJ_TAGS | VERB_TAGS | ADV_TAGS): warn(word.cpostag + " with CMP") if value == "Comp": return [("Degree", "Cmp")] elif value == "Pos": return [("Degree", "Pos")] elif value == "Superl": return [("Degree", "Sup")] else: return [] # assert False, 'unknown CMP value ' + value
def add_SUBCAT_to_Pron(word): # Assign feature SUBCAT to pronouns lacking it using surface # form-based heuristics. fmap = word.feat_map() if 'SUBCAT' in fmap or word.postag != 'Pron': return [] try: sc = Pron_SUBCAT_by_lemma[word.lemma] except KeyError: warn(u'failed to assign SUBCAT to Pron') return [] return [('SUBCAT', sc)]
def tearDown(self): if self.skip: return if self.p.poll() == None: os.kill(self.p.pid, signal.SIGINT) time.sleep(0.5) while self.p.poll() == None: os.kill(self.p.pid, signal.SIGTERM) time.sleep(1) for stream in (self.p.stdout, self.p.stderr): for line in stream: if tob('warning') in line.lower(): tools.warn(line.strip().decode('utf8')) elif tob('error') in line.lower(): raise AssertionError(line.strip().decode('utf8'))
def tearDown(self): if self.skip: return for i in range(10): if self.p.poll() != None: break os.kill(self.p.pid, signal.SIGINT) time.sleep(0.1*i) for i in range(10): if self.p.poll() != None: break os.kill(self.p.pid, signal.SIGINT) time.sleep(i) for stream in (self.p.stdout, self.p.stderr): for line in stream: if tob('warning') in line.lower(): tools.warn(line.strip().decode('utf8')) elif tob('error') in line.lower(): raise AssertionError(line.strip().decode('utf8'))
def tearDown(self): if self.skip: return for i in range(10): if self.p.poll() != None: break os.kill(self.p.pid, signal.SIGINT) time.sleep(0.1 * i) for i in range(10): if self.p.poll() != None: break os.kill(self.p.pid, signal.SIGINT) time.sleep(i) for stream in (self.p.stdout, self.p.stderr): for line in stream: if tob('warning') in line.lower(): tools.warn(line.strip().decode('utf8')) elif tob('error') in line.lower(): raise AssertionError(line.strip().decode('utf8'))
def add_Person(word): # Assign feature Person to personal pronouns, which for some # reason lack it in Omorfi analyses. fmap = word.feat_map() if word.cpostag != 'PRON': return [] elif fmap.get('SUBCAT') != 'Pers': return [] else: p = person_by_lemma.get(word.lemma, None) if p is not None: return [('Person', p)] else: warn(u'missing person for pronoun lemma ' + word.lemma) return []
def rewrite_NUM(word): fmap = word.feat_map() if word.cpostag not in (VERB_TAGS | NOUN_TAGS | ADJ_TAGS | PRON_TAGS): warn(word.cpostag + " with NUM") # Both PRS and NUM would generate redundant Number features assert "PRS" not in fmap value = word.feat_map()["NUM"] if value == "Sg": return [("Number", "Sing")] elif value == "Pl": return [("Number", "Plur")] else: # assert False, 'unknown NUM value %s' % value return []
def rewrite_NUM(word): fmap = word.feat_map() if word.cpostag not in (VERB_TAGS | NOUN_TAGS | ADJ_TAGS | PRON_TAGS): warn(word.cpostag + ' with NUM') # Both PRS and NUM would generate redundant Number features assert 'PRS' not in fmap value = word.feat_map()['NUM'] if value == 'Sg': return [('Number', 'Sing')] elif value == 'Pl': return [('Number', 'Plur')] else: #assert False, 'unknown NUM value %s' % value return []
def rewrite_CASE(word): fmap = word.feat_map() value = fmap['CASE'] # any nouns, pronouns, adjectives and numbers can take case, as # can non-finite verbs (infinititives and participles), others # can't. if word.cpostag not in (NOUN_TAGS | PRON_TAGS | ADJ_TAGS | NUM_TAGS): if word.cpostag not in VERB_TAGS: warn(word.cpostag + ' with CASE') elif 'INF' not in fmap and 'PCP' not in fmap: warn('non-INF/PCP ' + word.cpostag + ' with CASE') if value == 'Abe': # abessive return [('Case', 'Abe')] elif value == 'Abl': # ablative return [('Case', 'Abl')] elif value == 'Acc': # accusative return [('Case', 'Acc')] elif value == 'Ade': # adessive return [('Case', 'Ade')] elif value == 'All': # allative return [('Case', 'All')] elif value == 'Com': # comitative return [('Case', 'Com')] elif value == 'Ela': # elative return [('Case', 'Ela')] elif value == 'Ess': # essive return [('Case', 'Ess')] elif value == 'Gen': # genitive return [('Case', 'Gen')] elif value == 'Ill': # illative return [('Case', 'Ill')] elif value == 'Ine': # inessive return [('Case', 'Ine')] elif value == 'Ins': # instructive return [('Case', 'Ins')] elif value == 'Nom': # nominative return [('Case', 'Nom')] elif value == 'Par': # partitive return [('Case', 'Par')] elif value == 'Tra': # translative return [('Case', 'Tra')] elif value == 'Dis': # distributive # see https://github.com/TurkuNLP/UniversalFinnish/issues/55 warn('not generating Case Dis') return [] elif value == 'Lat': # lative # see https://code.google.com/p/omorfi/wiki/AnalysisPossibilities, # http://scripta.kotus.fi/visk/sisallys.php?p=120 Huom 1, # https://github.com/TurkuNLP/UniversalFinnish/issues/44 warn('not generating Case Lat') return [] else: return [] #assert False, 'unknown CASE value ' + value
def rewrite_CASE(word): fmap = word.feat_map() value = fmap["CASE"] # any nouns, pronouns, adjectives and numbers can take case, as # can non-finite verbs (infinititives and participles), others # can't. if word.cpostag not in (NOUN_TAGS | PRON_TAGS | ADJ_TAGS | NUM_TAGS): if word.cpostag not in VERB_TAGS: warn(word.cpostag + " with CASE") elif "INF" not in fmap and "PCP" not in fmap: warn("non-INF/PCP " + word.cpostag + " with CASE") if value == "Abe": # abessive return [("Case", "Abe")] elif value == "Abl": # ablative return [("Case", "Abl")] elif value == "Acc": # accusative return [("Case", "Acc")] elif value == "Ade": # adessive return [("Case", "Ade")] elif value == "All": # allative return [("Case", "All")] elif value == "Com": # comitative return [("Case", "Com")] elif value == "Ela": # elative return [("Case", "Ela")] elif value == "Ess": # essive return [("Case", "Ess")] elif value == "Gen": # genitive return [("Case", "Gen")] elif value == "Ill": # illative return [("Case", "Ill")] elif value == "Ine": # inessive return [("Case", "Ine")] elif value == "Ins": # instructive return [("Case", "Ins")] elif value == "Nom": # nominative return [("Case", "Nom")] elif value == "Par": # partitive return [("Case", "Par")] elif value == "Tra": # translative return [("Case", "Tra")] elif value == "Dis": # distributive # see https://github.com/TurkuNLP/UniversalFinnish/issues/55 warn("not generating Case Dis") return [] elif value == "Lat": # lative # see https://code.google.com/p/omorfi/wiki/AnalysisPossibilities, # http://scripta.kotus.fi/visk/sisallys.php?p=120 Huom 1, # https://github.com/TurkuNLP/UniversalFinnish/issues/44 warn("not generating Case Lat") return [] else: return [] # assert False, 'unknown CASE value ' + value
def remove_Inf5(word): # Remove Inf5 feature from verbs. Omorfi generates Inf5 *very* # rarely (once in TDT) and inconsistently, and the "maisillaan" # form termed as the "5th infinitive" is not considered as such by # ISK (http://scripta.kotus.fi/visk/sisallys.php?p=120). fmap = word.feat_map() if 'INF' not in fmap: return value = fmap['INF'] if value != 'Inf5': return if word.cpostag not in VERB_TAGS: warn('unexpected CPOSTAG with INF=Inf5: ' + word.cpostag) word.remove_feat('INF', 'Inf5')
def rewrite_ADJ_SUBCAT(word): value = word.feat_map()['SUBCAT'] # NOTE: UD NumType applies also to adjectives, see # http://universaldependencies.github.io/docs/u/feat/NumType.html if value == 'Card': return [('NumType', 'Card')] elif value == 'Ord': return [('NumType', 'Ord')] if value == 'Interr' or value == 'Rel': # see https://github.com/TurkuNLP/UniversalFinnish/issues/61 warn('not mapping ADJ SUBCAT ' + value) elif value == 'Pfx': # see https://github.com/TurkuNLP/UniversalFinnish/issues/60 warn('not mapping ADJ SUBCAT Pfx') else: return [] #assert False, 'unknown ADJ SUBCAT ' + value return []
def rewrite_ADJ_SUBCAT(word): value = word.feat_map()["SUBCAT"] # NOTE: UD NumType applies also to adjectives, see # http://universaldependencies.github.io/docs/u/feat/NumType.html if value == "Card": return [("NumType", "Card")] elif value == "Ord": return [("NumType", "Ord")] if value == "Interr" or value == "Rel": # see https://github.com/TurkuNLP/UniversalFinnish/issues/61 warn("not mapping ADJ SUBCAT " + value) elif value == "Pfx": # see https://github.com/TurkuNLP/UniversalFinnish/issues/60 warn("not mapping ADJ SUBCAT Pfx") else: return [] # assert False, 'unknown ADJ SUBCAT ' + value return []
def tearDown(self): if self.skip: return if self.p.poll() == None: os.kill(self.p.pid, signal.SIGINT) time.sleep(0.5) if self.p.poll() == None: os.kill(self.p.pid, signal.SIGTERM) time.sleep(0.5) while self.p.poll() == None: tools.warn("Trying to kill server %r with pid %d." % (self.server, self.p.pid)) os.kill(self.p.pid, signal.SIGKILL) time.sleep(1) lines = [line for stream in (self.p.stdout, self.p.stderr) for line in stream] for line in lines: if tob('warning') in line.lower(): tools.warn(line.strip().decode('utf8')) elif tob('error') in line.lower(): raise AssertionError(line.strip().decode('utf8'))
def remove_Adv_CASE(word): # Remove case feature from adverbs. Omorfi is only expected to # assign the CASE feature value Dis (distributive) to adverbs, and # only inconsistently. Distributive is not recognized as a Finnish # case by ISK (http://scripta.kotus.fi/visk/sisallys.php?p=81). # Decided to remove this altogether, resulting in a consistent # treatment where no adjective has case. # https://github.com/TurkuNLP/UniversalFinnish/issues/17 if word.cpostag != 'ADV': return fmap = word.feat_map() if 'CASE' not in fmap: return value = fmap['CASE'] if value == 'Dis': word.remove_feat('CASE', 'Dis') else: warn('unexpected CASE value for ADV: ' + value)
def rewrite_NOUN_SUBCAT(word): value = word.feat_map()['SUBCAT'] # In the initial CoNLL-U conversion implementation, the only # common noun SUBCAT value was Prop (proper noun), which has # already been mapped in rewrite-pos.py. Since then, we've # encountered also Acro and Abbr. Just sanity-check the former but # add the latter two using a mapping matching that for ACRO=Yes # and ABBR=Yes. if value == 'Prop': assert word.cpostag == 'PROPN', 'internal error' elif value == 'Pfx': # see https://github.com/TurkuNLP/UniversalFinnish/issues/60 warn('not mapping NOUN SUBCAT Pfx') elif value in ('Acro', 'Abbr'): return [('Abbr', 'Yes')] else: return [] #assert False, 'unknown NOUN SUBCAT ' + value return []
def rewrite_NOUN_SUBCAT(word): value = word.feat_map()["SUBCAT"] # In the initial CoNLL-U conversion implementation, the only # common noun SUBCAT value was Prop (proper noun), which has # already been mapped in rewrite-pos.py. Since then, we've # encountered also Acro and Abbr. Just sanity-check the former but # add the latter two using a mapping matching that for ACRO=Yes # and ABBR=Yes. if value == "Prop": assert word.cpostag == "PROPN", "internal error" elif value == "Pfx": # see https://github.com/TurkuNLP/UniversalFinnish/issues/60 warn("not mapping NOUN SUBCAT Pfx") elif value in ("Acro", "Abbr"): return [("Abbr", "Yes")] else: return [] # assert False, 'unknown NOUN SUBCAT ' + value return []
def rewrite_Num(word): value = word.feat_map().get('SUBCAT') if value == 'Card': return 'NUM' elif value == 'Ord': return 'ADJ' # surface form-based heuristics t = numtype(word.form) if t == CARDINAL: return 'NUM' elif t == ORDINAL: # not quite sure about this, gives e.g. 1./ADJ warn('assigning ADJ to "ordinal": ' + word.form) return 'ADJ' elif t is None: warn(word.cpostag + u' without SUBCAT Card or Ord:' + word.form) # default to NUM (TODO: avoid guessing) return 'NUM' else: assert False, 'internal error'
def remove_Inf1_CASE_Lat(word): # Remove case feature with value Lat (lative) from infinitive # verbs. Omorfi follows a dated analysis where the base form of # the A-infinitive (Infinitive 1) is termed lative. Lative is not # recognized by ISK (http://scripta.kotus.fi/visk/sisallys.php?p=81, # see also http://scripta.kotus.fi/visk/sisallys.php?p=120 Huom 1). # Decided to remove this case. Note that no information is removed, # as the Lat value for case fully coincides with Inf1 and no other # case in Omorfi. # https://github.com/TurkuNLP/UniversalFinnish/issues/44 fmap = word.feat_map() if 'CASE' not in fmap: return value = fmap['CASE'] if value != 'Lat': return if word.cpostag not in VERB_TAGS: warn('unexpected CPOSTAG with CASE=Lat: ' + word.cpostag) word.remove_feat('CASE', 'Lat')
def rewrite_PRS(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + " with PRS") # Both PRS and NUM would generate redundant Number features assert "NUM" not in word.feat_map() value = word.feat_map()["PRS"] if value == "Sg1": return [("Person", "1"), ("Number", "Sing")] elif value == "Sg2": return [("Person", "2"), ("Number", "Sing")] elif value == "Sg3": return [("Person", "3"), ("Number", "Sing")] elif value == "Pl1": return [("Person", "1"), ("Number", "Plur")] elif value == "Pl2": return [("Person", "2"), ("Number", "Plur")] elif value == "Pl3": return [("Person", "3"), ("Number", "Plur")] else: warn("unmapped PRS " + word.feat_map()["PRS"]) return []
def rewrite_PRS(word): if word.cpostag not in VERB_TAGS: warn(word.cpostag + ' with PRS') # Both PRS and NUM would generate redundant Number features assert 'NUM' not in word.feat_map() value = word.feat_map()['PRS'] if value == 'Sg1': return [('Person', '1'), ('Number', 'Sing')] elif value == 'Sg2': return [('Person', '2'), ('Number', 'Sing')] elif value == 'Sg3': return [('Person', '3'), ('Number', 'Sing')] elif value == 'Pl1': return [('Person', '1'), ('Number', 'Plur')] elif value == 'Pl2': return [('Person', '2'), ('Number', 'Plur')] elif value == 'Pl3': return [('Person', '3'), ('Number', 'Plur')] else: warn('unmapped PRS '+word.feat_map()['PRS']) return []
def tearDown(self): if self.skip: return if self.p.poll() == None: os.kill(self.p.pid, signal.SIGINT) time.sleep(0.5) if self.p.poll() == None: os.kill(self.p.pid, signal.SIGTERM) time.sleep(0.5) while self.p.poll() == None: tools.warn("Trying to kill server %r with pid %d." % (self.server, self.p.pid)) os.kill(self.p.pid, signal.SIGKILL) time.sleep(1) lines = [ line for stream in (self.p.stdout, self.p.stderr) for line in stream ] for line in lines: if tob('warning') in line.lower(): tools.warn(line.strip().decode('utf8')) elif tob('error') in line.lower(): raise AssertionError(line.strip().decode('utf8'))
t = MakoTemplate(name='mako_inherit', lookup=['./views/']).render(var='v') self.assertEqual('o\ncvc\no\n', t) t = MakoTemplate('<%inherit file="mako_base.tpl"/>\nc${var}c\n', lookup=['./views/']).render(var='v') self.assertEqual('o\ncvc\no\n', t) t = MakoTemplate('<%inherit file="views/mako_base.tpl"/>\nc${var}c\n', lookup=['./']).render(var='v') self.assertEqual('o\ncvc\no\n', t) def test_template_shortcut(self): result = mako_template('start ${var} end', var='middle') self.assertEqual(touni('start middle end'), result) def test_view_decorator(self): @mako_view('start ${var} end') def test(): return dict(var='middle') self.assertEqual(touni('start middle end'), test()) try: import mako except ImportError: warn("WARNING: No Mako template support. Skipping tests.") del TestMakoTemplate if __name__ == '__main__': #pragma: no cover unittest.main()
def test_inherit(self): """ Templates: Mako lookup and inherience """ t = MakoTemplate(name='mako_inherit', lookup=['./views/']).render(var='v') self.assertEqual('o\ncvc\no\n', t) t = MakoTemplate('<%inherit file="mako_base.tpl"/>\nc${var}c\n', lookup=['./views/']).render(var='v') self.assertEqual('o\ncvc\no\n', t) t = MakoTemplate('<%inherit file="views/mako_base.tpl"/>\nc${var}c\n', lookup=['./']).render(var='v') self.assertEqual('o\ncvc\no\n', t) def test_template_shortcut(self): result = mako_template('start ${var} end', var='middle') self.assertEqual(touni('start middle end'), result) def test_view_decorator(self): @mako_view('start ${var} end') def test(): return dict(var='middle') self.assertEqual(touni('start middle end'), test()) try: import mako except ImportError: warn("No Mako template support. Skipping tests.") del TestMakoTemplate if __name__ == '__main__': #pragma: no cover unittest.main()
def rewrite_DRV(word): value = word.feat_map()['DRV'] if value == 'Der_minen': # "-minen" produces noun (e.g. "valmistaminen") # http://scripta.kotus.fi/visk/sisallys.php?p=221, # https://github.com/TurkuNLP/UniversalFinnish/issues/21 if word.cpostag not in NOUN_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Minen')] elif value == 'Der_sti': # "-sti" produces adverb (e.g. "pysyvästi") # http://scripta.kotus.fi/visk/sisallys.php?p=371 if word.cpostag not in ADV_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Sti')] elif value == 'Der_inen': # "-inen" produces adjective or noun (e.g. "omenainen") # http://scripta.kotus.fi/visk/sisallys.php?p=261, for Omorfi # apparently only adjectives. if word.cpostag not in ADJ_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Inen')] elif value == 'Der_lainen': # "-lainen" produces noun (e.g. "Turkulainen") # http://scripta.kotus.fi/visk/sisallys.php?p=190 if word.cpostag not in (ADJ_TAGS | NOUN_TAGS): warn(word.cpostag + ' with ' + value) return [('Derivation', 'Lainen')] elif value == 'Der_ja': # "-ja" produces noun (e.g. "oppija") # http://scripta.kotus.fi/visk/sisallys.php?p=252 if word.cpostag not in NOUN_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Ja')] elif value == 'Der_ton': # "-ton" produces adjective (e.g. "voimaton") # http://scripta.kotus.fi/visk/sisallys.php?p=292 if word.cpostag not in ADJ_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Ton')] elif value == 'Der_vs': # "-vs" produces noun (e.g. "toimivuus") if word.cpostag not in NOUN_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Vs')] elif value == 'Der_llinen': # "-llinen" produces adjective (e.g. "vaunullinen") # http://scripta.kotus.fi/visk/sisallys.php?p=276 if word.cpostag not in ADJ_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Llinen')] elif value == 'Der_ttain': # "-ttain" produces adverb (e.g. "lajeittain") if word.cpostag not in ADV_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Ttain')] elif value == 'Der_u': # "-u" produces noun (e.g. "lopettelu") # http://scripta.kotus.fi/visk/sisallys.php?p=221, # https://github.com/TurkuNLP/UniversalFinnish/issues/21 if word.cpostag not in NOUN_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'U')] elif value == 'Der_ttaa': # "-ttaa" produces verb (e.g. "vaivaannuttaa") if word.cpostag not in VERB_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Ttaa')] elif value == 'Der_tar': # "tar-" produces noun (e.g. "suojelijatar") if word.cpostag not in NOUN_TAGS: warn(word.cpostag + ' with ' + value) return [('Derivation', 'Tar')] else: warn('unknown DRV value ' + value) return []