clause = list_checker(( "S", "SBAR", )) # Is a simple or subordinated clause simple_or_sub_phrase = clause # Is a Noun phrase noun_phrase = equality_checker("NP") # Is a Verb phrase verb_phrase = equality_checker("VP") # Is a Adverbial phrase adverbial_phrase = fail() # Is a complement direct complement_direct = list_checker(("CD", )) # Is a particle constituent particle_constituent = fail() # Is a past_participle verb constituent past_participle_verb = equality_checker("VBN") # Is an interjection constituent interjection = fail() # Is a NER annotated into semantic tree ner_constituent = fail()
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, list_checker, matcher, fail __author__ = 'Valeria Quochi <*****@*****.**>' __date__ = '5/16/2013' # Is a root constituent root = list_checker(("root", "top", "ROOT", "TOP")) """Clause introduced by a (possibly empty) subordinating conjunction.""" # Is a clause clause = list_checker(( "S", "SBAR", )) # Is a Noun phrase noun_phrase = equality_checker("NP") # Is a Verb phrase verb_phrase = equality_checker("VP") particle_constituents = fail() past_participle_verb = equality_checker("VBN") interjections = fail() simple_or_sub_phrase = list_checker(("S", "SBAR")) ner_constituent = fail() mention_constituents = matcher("NP.*") head_rules = list_checker(("SN", "SUJ", "GRUP.NOM"))
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, matcher, fail __author__ = '' # Features questions female = matcher(".*FEM.*") male = matcher(".*MASC*") neutral = fail() singular = matcher(".*SING.*") plural = matcher(".*PLUR.*") animate = fail() inanimate = fail() # Adjectives adjective = matcher("^ADJ.*") # Pronouns pronoun = matcher("^PRON.*") personal_pronoun = matcher("^PRON.*PRS.*") relative_pronoun = matcher("^PRON.*REL.*") interrogative_pronoun = matcher("^PRON.*INT.*") mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x) # Nouns singular_common_noun = matcher("^NOUN.*SING.*") plural_common_noun = matcher("^NOUN.*PLUR.*")
__author__ = 'Josu Bermudez <*****@*****.**>' from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail _no_ner = "O" no_ner = lambda x: x == _no_ner or x is None or x == "" all = lambda x: x != _no_ner and x is not None and x != "" # Classic 3 types useful in some cases person = list_checker(("person", "per")) organization = list_checker(("org", "organization")) location = list_checker(("location", "loc")) other = list_checker(("misc", "other")) singular = fail() # lambda x: all(x) and not organization(x) plural = fail() # organization animate = fail() #list_checker(("PERSON", "PER")) inanimate = fail( ) #list_checker(("FACILITY", "FAC", "NORP", "LOCATION", "LOC", "PRODUCT", "EVENT", "ORGANIZATION", "ORG", # "WORK OF ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "NUMBER", "QUANTITY", # "ORDINAL", "CARDINAL", "MISC", "GPE", "WEA", "NML")) # NE types that denotes mention mention_ner = lambda x: x != _no_ner and x is not None and x != "" mention_ner = list_checker( ("person", "norp", "facility", "organization", "gpe", "nml", "location", "product", "event", "work of art", "law", "language", "date", "time")) # NE types that must be filtered from mention candidates
indefinite = list_checker( ('quelcom', 'algú', 'alguna', 'algunes', 'algun', 'alguns', 'ambdues', 'ambdós', 'bastant', 'bastants', 'qualssevol', 'qualsevol', 'altres', 'massa', 'mitja', 'mitjans', 'mateixa', ' mateixes ', ' mateix ', 'mateixos', 'molta', 'moltes', 'molt', 'molts', 'res', 'ningú', 'cap', 'gens', 'ningú', 'ninguns', 'altres', 'altre', 'poca', 'poques', 'poc', 'pocs', 'qualsevol', 'tantes', 'tanta', 'tants', 'tant', 'totes', 'tota', 'tots', 'tot', 'unes', 'una', 'uns', 'un', 'diverses', 'diversos')) # from Freeling dict PR.* relative = list_checker(('on', 'com', 'qui', 'quins', 'quan', 'quanta', 'quantes', 'quants', 'que', 'qui', 'quins')) reflexive = matcher(r'^[^\s]* mism(o|a)s?$') no_organization = fail() first_person = list_checker( ("em", "meva", "meves", "me", "meu", "meu", "meus", "meues", "ens", "ens", "nosaltres", "nosaltres", "nostra", "nostres", "nostre", "jo")) second_person = list_checker( ("us", "et", "tu", "teves", "teva", "teus", "teu", "teues", "vostès", "vostè", "vosaltres", "vós", "vostres", "vostra", "vostres", "vostre")) third_person = list_checker( ("ell", "ella", "elles", "ells", "la", "les", "li", "els", "el", "seves", "seva", "seus", "seu", "seues")) pleonastic = list_checker(("això", "_"))
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, fail, matcher __author__ = 'Valeria Quochi <*****@*****.**>' __date__ = '5/16/2013' # features questions female = fail() male = fail() neutral = fail() singular = equality_checker("^NOU_CS") plural = equality_checker("^NOU_CP") animate = fail() inanimate = fail() # Adjectives adjective = matcher("^ADJ.*") # pronouns personal_pronoun = matcher("^PRO~PE") relative_pronoun = matcher("^PRO~RE") pronoun = matcher("^PRO") mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x) singular_common_noun = equality_checker("^NOU_CS") plural_common_noun = equality_checker("^NOU_CP") proper_noun = matcher("^NOU~PR") noun = matcher("^NOU.*") verbs = matcher("^V.*") modals = equality_checker("^VMO.*")
root = list_checker(("root", "top", "ROOT", "TOP")) # Is a clause clause = list_checker(("S", "SENTENCE")) # Is a Noun phrase noun_phrase = list_checker(("SN", "GRUP.NOM", "SUJ")) # Is a Verb phrase verb_phrase = list_checker(("GRUP.VERB",)) # Is a complement direct complement_direct = list_checker(("CD",)) # Is a particle constituent particle_constituent = fail() # Is a past_participle verb constituent past_participle_verb = fail() # Is an interjection constituent interjection = equality_checker("INTERJECCIÓ") # Is a NER annotated into semantic tree ner_constituent = fail() # Is a simple or subordinated clause simple_or_sub_phrase = clause #TODO Remove this check mention_constituents = lambda x: noun_phrase(x) or complement_direct(x)
# coding=utf-8 """ Named entity labels used in Semeval 2010. """ from corefgraph.resources.lambdas import list_checker, fail __author__ = 'Josu Bermudez <*****@*****.**>' all = lambda x: not ((x == "O") or (x is None) or (x == "")) # Classic 3 types useful in some cases person = list_checker(("PERSON", "PER", "person")) organization = list_checker(("ORG", "ORGANIZATION", "org")) location = list_checker(("LOCATION", "LOC", "loc")) #other = list_checker(("MISC", "OTHER", "other")) singular = lambda x: all(x) and not organization(x) plural = fail() animate = person inanimate = location # NE types that denotes mention mention_ner = all # NE types that must be filtered from mention candidates no_mention_ner = lambda: False
# very basic Stopword list. stop_words = list_checker( ("a", "ad", "ai", "al", "alla", "allo", "con", "cosi'", "così", "da", "del", "della", "dello", "dentro", "di", "e", "ecco", "ed", "fra", "fuori", "ha", "hai", "hanno", "ho", "il", "in", "nei", "nella", "o", "per", "qua'", "quello", "questo", "qui", "quindi", "quà", "sopra", "sotto", "su", "sul", "sulla", "tra", "un", "una", "uno")) extended_stop_words = list_checker(("ad", "ad")) non_words = list_checker( ("mm", "hmm", "ahm", "uhm", "ehm", "ah", "eh", "oh", "uh", "ih")) unreliable = fail() invalid_stop_words = list_checker( ("c'è", "c'e'", "spa", "s.p.a.", "s.r.l.", "ecc", "etc")) # TODO. Re-Check. not sure of what should go here invalid_start_words = list_checker(( "'s", "etc", )) invalid_end_words = list_checker(("etc", )) location_modifiers = list_checker( ("est", "ovest", "nord", "sud", "est", "ovest", "nord", "sud", "superiore", "inferiore")) common_NE_subfixes = fail() # TODO speaking_begin = list_checker(("``", ))
# coding=utf-8 from corefgraph.resources.lambdas import equality_checker, matcher, fail __author__ = 'Josu Bermudez <*****@*****.**>' # Features questions female = matcher(r"^[ADP]..F|^N.F|^V.....F") male = matcher(r"^[ADPS]..M|^N.M|^V.....M") neutral = matcher(r"^[ADP]..N") singular = matcher(r"^[ADPS]...S|^N..S|^V....S") plural = matcher(r"^[ADPS]...P|^N..P|^V....P") animate = fail() inanimate = fail() # Adjectives adjective = matcher(r"^A") # Pronouns pronoun = matcher(r"^D?P") personal_pronoun = matcher(r"^PP") relative_pronoun = matcher(r"^PR") interrogative_pronoun = matcher(r"^PT") mention_pronoun = matcher(r"P[PXRL]|^DP") # Nouns singular_common_noun = matcher(r"^NC.S") plural_common_noun = matcher(r"^NC.P")
adjective_qualif = "ADJ~QU" adjective_ord = "ADJ~OR" adjective_indef = "ADJ~IN" adjective_dem = "ADJ~DE" adjective_poss = "ADJ~PO" adjective_deitt = "ADJ~DI" adjective_interr = "ADJ~IR" adjective_excl = "ADJ~EX" """ _conjunctions = equality_checker("CONJ", ) # Usable functions # features questions female = fail() male = fail() neutral = fail() singular = fail() plural = fail() animate = fail() inanimate = fail() # Adjectives adjective = list_checker( (_adjective, _adjective_comparative, _adjective_superlative)) # pronouns personal_pronoun = list_checker((_personal_pronoun, _possessive_pronoun)) relative_pronoun = list_checker((_wh_pronoun, _wh_possessive_pronoun)) pronoun = list_checker((_personal_pronoun, _possessive_pronoun, _wh_pronoun,
_pronouns = matcher(r"pos=p") _possessive = matcher(r".*postype=possessive") _relative = matcher(r".*postype=relative") _personal = matcher(r".*postype=personal") _personal_pronouns = lambda x: _pronouns(x) and _personal(x) # features questions male = matcher(r".*gen=m") female = matcher(r".*gen=f") neutral = matcher(r".*gen=n") singular = matcher(r".*num=s") plural = matcher(r".*num=p") animate = fail() inanimate = fail() # Adecjtives adjectives = matcher(r"pos=a") # Determinant _possessive_determinant = matcher(".*postype=possessive") #pronouns #TODO Assure broad usage of these tags pronouns = lambda x: _pronouns(x) or _possessive_determinant(x) possessive_pronouns = lambda x: _pronouns(x) and _possessive(x) relative_pronoun = lambda x: _pronouns(x) and _relative(x) mention_pronouns = lambda x: _personal_pronouns(x) or possessive_pronouns(x) or _possessive_determinant(x) \