コード例 #1
0
clause = list_checker((
    "S",
    "SBAR",
))

# Is a simple or subordinated clause
simple_or_sub_phrase = clause

# Is a Noun phrase
noun_phrase = equality_checker("NP")

# Is a Verb phrase
verb_phrase = equality_checker("VP")

# Is a Adverbial phrase
adverbial_phrase = fail()

# Is a complement direct
complement_direct = list_checker(("CD", ))

# Is a particle constituent
particle_constituent = fail()

# Is a past_participle verb constituent
past_participle_verb = equality_checker("VBN")

# Is an interjection constituent
interjection = fail()

# Is a NER annotated into semantic tree
ner_constituent = fail()
コード例 #2
0
ファイル: constituent.py プロジェクト: josubg/CorefGraph
# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, list_checker, matcher, fail

__author__ = 'Valeria Quochi <*****@*****.**>'
__date__ = '5/16/2013'

# Is a root constituent
root = list_checker(("root", "top", "ROOT", "TOP"))
"""Clause introduced by a (possibly empty) subordinating conjunction."""

# Is a clause
clause = list_checker((
    "S",
    "SBAR",
))

# Is a Noun phrase
noun_phrase = equality_checker("NP")

# Is a Verb phrase
verb_phrase = equality_checker("VP")

particle_constituents = fail()
past_participle_verb = equality_checker("VBN")

interjections = fail()
simple_or_sub_phrase = list_checker(("S", "SBAR"))
ner_constituent = fail()
mention_constituents = matcher("NP.*")
head_rules = list_checker(("SN", "SUJ", "GRUP.NOM"))
コード例 #3
0
ファイル: partofspeech.py プロジェクト: josubg/CorefGraph
# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, matcher, fail

__author__ = ''


# Features questions
female = matcher(".*FEM.*")
male = matcher(".*MASC*")
neutral = fail()

singular = matcher(".*SING.*")
plural = matcher(".*PLUR.*")

animate = fail()
inanimate = fail()

# Adjectives
adjective = matcher("^ADJ.*")


# Pronouns
pronoun = matcher("^PRON.*")
personal_pronoun = matcher("^PRON.*PRS.*")
relative_pronoun = matcher("^PRON.*REL.*")
interrogative_pronoun = matcher("^PRON.*INT.*")
mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x)

# Nouns
singular_common_noun = matcher("^NOUN.*SING.*")
plural_common_noun = matcher("^NOUN.*PLUR.*")
コード例 #4
0
__author__ = 'Josu Bermudez <*****@*****.**>'

from corefgraph.resources.lambdas import list_checker, equality_checker, matcher, fail

_no_ner = "O"

no_ner = lambda x: x == _no_ner or x is None or x == ""
all = lambda x: x != _no_ner and x is not None and x != ""

# Classic 3 types useful in some cases
person = list_checker(("person", "per"))
organization = list_checker(("org", "organization"))
location = list_checker(("location", "loc"))
other = list_checker(("misc", "other"))

singular = fail()  # lambda x: all(x) and not organization(x)
plural = fail()  # organization

animate = fail()  #list_checker(("PERSON", "PER"))
inanimate = fail(
)  #list_checker(("FACILITY", "FAC", "NORP", "LOCATION", "LOC", "PRODUCT", "EVENT", "ORGANIZATION", "ORG",
#        "WORK OF ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "NUMBER", "QUANTITY",
#        "ORDINAL", "CARDINAL", "MISC", "GPE", "WEA", "NML"))

# NE types that denotes mention
mention_ner = lambda x: x != _no_ner and x is not None and x != ""
mention_ner = list_checker(
    ("person", "norp", "facility", "organization", "gpe", "nml", "location",
     "product", "event", "work of art", "law", "language", "date", "time"))

# NE types that must be filtered from mention candidates
コード例 #5
0
ファイル: pronouns.py プロジェクト: josubg/CorefGraph
indefinite = list_checker(
    ('quelcom', 'algú', 'alguna', 'algunes', 'algun', 'alguns', 'ambdues',
     'ambdós', 'bastant', 'bastants', 'qualssevol', 'qualsevol', 'altres',
     'massa', 'mitja', 'mitjans', 'mateixa', ' mateixes ', ' mateix ',
     'mateixos', 'molta', 'moltes', 'molt', 'molts', 'res', 'ningú', 'cap',
     'gens', 'ningú', 'ninguns', 'altres', 'altre', 'poca', 'poques', 'poc',
     'pocs', 'qualsevol', 'tantes', 'tanta', 'tants', 'tant', 'totes', 'tota',
     'tots', 'tot', 'unes', 'una', 'uns', 'un', 'diverses', 'diversos'))

# from Freeling dict PR.*
relative = list_checker(('on', 'com', 'qui', 'quins', 'quan', 'quanta',
                         'quantes', 'quants', 'que', 'qui', 'quins'))

reflexive = matcher(r'^[^\s]* mism(o|a)s?$')

no_organization = fail()

first_person = list_checker(
    ("em", "meva", "meves", "me", "meu", "meu", "meus", "meues", "ens", "ens",
     "nosaltres", "nosaltres", "nostra", "nostres", "nostre", "jo"))

second_person = list_checker(
    ("us", "et", "tu", "teves", "teva", "teus", "teu", "teues", "vostès",
     "vostè", "vosaltres", "vós", "vostres", "vostra", "vostres", "vostre"))

third_person = list_checker(
    ("ell", "ella", "elles", "ells", "la", "les", "li", "els", "el", "seves",
     "seva", "seus", "seu", "seues"))

pleonastic = list_checker(("això", "_"))
コード例 #6
0
ファイル: partofspeech.py プロジェクト: josubg/CorefGraph
# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, fail, matcher

__author__ = 'Valeria Quochi <*****@*****.**>'
__date__ = '5/16/2013'

# features questions
female = fail()
male = fail()
neutral = fail()
singular = equality_checker("^NOU_CS")
plural = equality_checker("^NOU_CP")
animate = fail()
inanimate = fail()

# Adjectives
adjective = matcher("^ADJ.*")

# pronouns
personal_pronoun = matcher("^PRO~PE")
relative_pronoun = matcher("^PRO~RE")
pronoun = matcher("^PRO")
mention_pronoun = lambda x: relative_pronoun(x) or personal_pronoun(x)

singular_common_noun = equality_checker("^NOU_CS")
plural_common_noun = equality_checker("^NOU_CP")
proper_noun = matcher("^NOU~PR")
noun = matcher("^NOU.*")

verbs = matcher("^V.*")
modals = equality_checker("^VMO.*")
コード例 #7
0
ファイル: constituent.py プロジェクト: josubg/CorefGraph
root = list_checker(("root", "top", "ROOT", "TOP"))

# Is a clause
clause = list_checker(("S", "SENTENCE"))

# Is a Noun phrase
noun_phrase = list_checker(("SN", "GRUP.NOM", "SUJ"))

# Is a Verb phrase
verb_phrase = list_checker(("GRUP.VERB",))

# Is a complement direct
complement_direct = list_checker(("CD",))

# Is a particle constituent
particle_constituent = fail()

# Is a past_participle verb constituent
past_participle_verb = fail()

# Is an interjection constituent
interjection = equality_checker("INTERJECCIÓ")

# Is a NER annotated into semantic tree
ner_constituent = fail()

# Is a simple or subordinated clause
simple_or_sub_phrase = clause

#TODO Remove this check
mention_constituents = lambda x: noun_phrase(x) or complement_direct(x)
コード例 #8
0
ファイル: namedentities.py プロジェクト: josubg/CorefGraph
# coding=utf-8
""" Named entity labels used in Semeval 2010.

"""
from corefgraph.resources.lambdas import list_checker, fail

__author__ = 'Josu Bermudez <*****@*****.**>'

all = lambda x: not ((x == "O") or (x is None) or (x == ""))

# Classic 3 types useful in some cases
person = list_checker(("PERSON", "PER", "person"))
organization = list_checker(("ORG", "ORGANIZATION", "org"))
location = list_checker(("LOCATION", "LOC", "loc"))
#other = list_checker(("MISC", "OTHER", "other"))

singular = lambda x: all(x) and not organization(x)
plural = fail()

animate = person
inanimate = location

# NE types that denotes mention
mention_ner = all

# NE types that must be filtered from mention candidates
no_mention_ner = lambda: False
コード例 #9
0
ファイル: stopwords.py プロジェクト: josubg/CorefGraph
# very basic Stopword list.

stop_words = list_checker(
    ("a", "ad", "ai", "al", "alla", "allo", "con", "cosi'", "così", "da",
     "del", "della", "dello", "dentro", "di", "e", "ecco", "ed", "fra",
     "fuori", "ha", "hai", "hanno", "ho", "il", "in", "nei", "nella", "o",
     "per", "qua'", "quello", "questo", "qui", "quindi", "quà", "sopra",
     "sotto", "su", "sul", "sulla", "tra", "un", "una", "uno"))

extended_stop_words = list_checker(("ad", "ad"))

non_words = list_checker(
    ("mm", "hmm", "ahm", "uhm", "ehm", "ah", "eh", "oh", "uh", "ih"))

unreliable = fail()
invalid_stop_words = list_checker(
    ("c'è", "c'e'", "spa", "s.p.a.", "s.r.l.", "ecc",
     "etc"))  # TODO. Re-Check. not sure of what should go here
invalid_start_words = list_checker((
    "'s",
    "etc",
))
invalid_end_words = list_checker(("etc", ))

location_modifiers = list_checker(
    ("est", "ovest", "nord", "sud", "est", "ovest", "nord", "sud", "superiore",
     "inferiore"))

common_NE_subfixes = fail()  # TODO
speaking_begin = list_checker(("``", ))
コード例 #10
0
ファイル: partofspeech.py プロジェクト: josubg/CorefGraph
# coding=utf-8
from corefgraph.resources.lambdas import equality_checker, matcher, fail

__author__ = 'Josu Bermudez <*****@*****.**>'


# Features questions
female = matcher(r"^[ADP]..F|^N.F|^V.....F")
male = matcher(r"^[ADPS]..M|^N.M|^V.....M")
neutral = matcher(r"^[ADP]..N")

singular = matcher(r"^[ADPS]...S|^N..S|^V....S")
plural = matcher(r"^[ADPS]...P|^N..P|^V....P")

animate = fail()
inanimate = fail()

# Adjectives
adjective = matcher(r"^A")


# Pronouns
pronoun = matcher(r"^D?P")
personal_pronoun = matcher(r"^PP")
relative_pronoun = matcher(r"^PR")
interrogative_pronoun = matcher(r"^PT")
mention_pronoun = matcher(r"P[PXRL]|^DP") 

# Nouns
singular_common_noun = matcher(r"^NC.S")
plural_common_noun = matcher(r"^NC.P")
コード例 #11
0
ファイル: partofspeech_old.py プロジェクト: josubg/CorefGraph
adjective_qualif = "ADJ~QU"
adjective_ord = "ADJ~OR"
adjective_indef = "ADJ~IN"
adjective_dem = "ADJ~DE"
adjective_poss = "ADJ~PO"
adjective_deitt = "ADJ~DI"
adjective_interr = "ADJ~IR"
adjective_excl = "ADJ~EX"
"""

_conjunctions = equality_checker("CONJ", )

# Usable functions

# features questions
female = fail()
male = fail()
neutral = fail()
singular = fail()
plural = fail()
animate = fail()
inanimate = fail()

# Adjectives
adjective = list_checker(
    (_adjective, _adjective_comparative, _adjective_superlative))

# pronouns
personal_pronoun = list_checker((_personal_pronoun, _possessive_pronoun))
relative_pronoun = list_checker((_wh_pronoun, _wh_possessive_pronoun))
pronoun = list_checker((_personal_pronoun, _possessive_pronoun, _wh_pronoun,
コード例 #12
0
ファイル: partofspeech.py プロジェクト: josubg/CorefGraph
_pronouns = matcher(r"pos=p")
_possessive = matcher(r".*postype=possessive")
_relative = matcher(r".*postype=relative")
_personal = matcher(r".*postype=personal")
_personal_pronouns = lambda x: _pronouns(x) and _personal(x)

# features questions
male = matcher(r".*gen=m")
female = matcher(r".*gen=f")
neutral = matcher(r".*gen=n")

singular = matcher(r".*num=s")
plural = matcher(r".*num=p")

animate = fail()
inanimate = fail()
# Adecjtives
adjectives = matcher(r"pos=a")

# Determinant
_possessive_determinant = matcher(".*postype=possessive")

#pronouns
#TODO Assure broad usage of these tags

pronouns = lambda x: _pronouns(x) or _possessive_determinant(x)
possessive_pronouns = lambda x: _pronouns(x) and _possessive(x)

relative_pronoun = lambda x: _pronouns(x) and _relative(x)
mention_pronouns = lambda x: _personal_pronouns(x) or possessive_pronouns(x) or _possessive_determinant(x) \