Ejemplo n.º 1
0
def feature_structures():
    fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
    print "fs1=", fs1
    print "fs1[TENSE]=", fs1['TENSE']
    fs1['CASE'] = 'acc'
    fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
    print "fs2=", fs2
    person = nltk.FeatStruct(name='Lee', telno='212 444 1212', age=33)
    print "person=", person
    print nltk.FeatStruct("""
  [NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
  SPOUSE=[Name='Kim', ADDRESS->(1)]]
  """)
Ejemplo n.º 2
0
def fuf_file_to_featstruct(fuf_filename):
    """
    Convert fuf file to C{nltk.FeatStruct} and processed the type definitions
    Returns the type table and the converted feature structure

    @param fuf_filename: The name of the file that contains the grammar
    @type fuf_filename: string
    @return: The type table (C{fstypes.FeatureTypeTable}) and the grammar
    as a C{nltk.featstruct.FeatStruct}.
    """

    # Convert the fuf code into expression lists
    sfp = SexpFileParser(fuf_filename)
    lsexp = sfp.parse()
    assert lsexp

    type_table = FeatureTypeTable()
    fs = nltk.FeatStruct()

    # process the type defs and the grammar
    for sexp in lsexp:
        if isinstance(sexp[0],
                      basestring) and sexp[0] == 'define-feature-type':
            assert len(sexp) == 3
            name, children = sexp[1], sexp[2]
            type_table.define_type(name, children)
        else:
            # assuming that it is a feature structure
            fs = _convert_fuf_featstruct(sexp)
            # there should be nothing following the feature definition
            break
    return type_table, fs
Ejemplo n.º 3
0
def _convert_fuf_featstruct(sexp):
    assert sexp.lparen == '('
    fs = nltk.FeatStruct()
    for child in sexp:
        if isinstance(child, basestring):
            feat, val = _convert_fuf_feature(sexp)
            fs[feat] = val
            break
        else:
            feat, val = _convert_fuf_feature(child)
            fs[feat] = val
    return fs
Ejemplo n.º 4
0
    def _get_value(self, fs, path):
        """
        Find and return the value within the feature structure
        given a path.

        @param fs: Feature structre
        @type fs: C{nltk.featstruct.FeatStruct}
        @param path: list of keys to follow
        @type path: list
        @return: the feature value at the end of the path
        """
        target = None

        # in case we find another link keep a copy
        ancestors = [fs]

        # to to the end
        last_step = path[-1]
        path = path[:-1]

        for step in path:
            if step in fs and not isinstance(fs[step], ReentranceLink):
                fs = fs[step]
                ancestors.append(fs)
            elif step not in fs:
                fs[step] = nltk.FeatStruct()
                fs = fs[step]
                ancestors.append(fs)
            elif isinstance(fs[step], ReentranceLink):
                parent = ancestors[-1 * fs[step].up]
                new_path = fs[step].down
                fs[step] = self._get_value(parent, new_path)
                fs = fs[step]

        if isinstance(fs, nltk.sem.Variable):
            return fs

        if last_step in fs:
            assert (not isinstance(fs[last_step], ReentranceLink))
            return fs[last_step]

        # All the way through the path but the value doesn't exist
        # create a variable
        fs[last_step] = self._unique_var()
        return fs[last_step]
Ejemplo n.º 5
0
def parseFoma(sentence):
    tokens = sentence.split()

    tokenAnalyses = {}
    rules = []
    count = 0
    for token in tokens:
        aVal = []
        result = list(fst.apply_up(str.encode(token)))
        for r in result:
            elements = r.decode('utf8').split('+')
            print(r.decode('utf8'))

            lemma = elements[0]
            tokFeat = nltk.FeatStruct("[PRED=" + lemma + "]")

            cat = elements[1]
            if len(elements) > 2:
                feats = tuple(elements[2:])
            else:
                feats = ()
            for x in feats:
                fRes2 = feat2LFG(x)
                fRes = tokFeat.unify(fRes2)
                if fRes:
                    tokFeat = fRes
                else:
                    print("Error unifying:", tokFeat, fRes2)
            flatFStr = flatFStructure(tokFeat)
            aVal.append(cat + flatFStr)
            rules.append(cat + flatFStr + " -> " + "'" + token + "'")
        tokenAnalyses[count] = aVal
        count += 1

    grammarText2 = grammarText + "\n" + "\n".join(rules)

    grammar = FeatureGrammar.fromstring(grammarText2)
    parser = nltk.parse.FeatureChartParser(grammar)
    result = list(parser.parse(tokens))
    if result:
        for x in result:
            print(x)
    else:
        print("*", sentence)
Ejemplo n.º 6
0
def feature_structure_unification():
    fs1 = nltk.FeatStruct(NUMBER=74, STREE='rue Pascal')
    fs2 = nltk.FeatStruct(CITY='Paris')
    print fs1.unify(fs2)
    # result of unification if fs1 subsumes fs2 or vice versa, the more
    # specific of the two.
    fs0 = nltk.FeatStruct("""
    [NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
    SPOUSE=[Name='Kim', ADDRESS->(1)]]
  """)
    print "fs0=", fs0
    fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")
    print fs1.unify(fs0)
    print "fs1=", fs1
    fs2 = nltk.FeatStruct("""
    [NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
    SPOUSE=[NAME=Kim, ADRRESS->(1)]]
  """)
    print "fs1.unify(fs2)=", fs1.unify(fs2)
    fs3 = nltk.FeatStruct("[ADDRESS=?x, ADDRESS2=?x]")
    print "fs2.unify(fs3)=", fs2.unify(fs3)
Ejemplo n.º 7
0
def _convert_fuf_feature(sexp):
    assert sexp.lparen == '(', sexp
    feat, name, index, val = ('', '', '', '')

    # Special handling for the alt feature
    if sexp[0] == 'alt':
        feat, name, index, val = parse_alt(sexp)
    elif sexp[0] == 'opt':
        feat, name, index, val = parse_opt(sexp)
    elif len(sexp) == 3 and sexp[1] == '===':
        feat, val = _convert_triple_eq(sexp)

    elif len(sexp) == 3 and sexp[1] == '~':
        del sexp[1]
        result = _list_convert(sexp[1])
        sexp[1] = result
        print sexp[1]
        feat, val = sexp
    else:
        assert len(sexp) == 2, sexp[1]
        assert isinstance(sexp[0], basestring), sexp
        feat, val = sexp

    # Special handling for pattern feature
    if feat in ('pattern', 'cset'):
        assert isinstance(val, SexpList) and val.lparen == '('
        return feat, nltk.FeatureValueTuple(val)

    # Special handling of the alt feature
    if feat == 'alt':
        assert isinstance(val, SexpList) and val.lparen == '('
        choices = list()
        for c in val:
            if isinstance(c, basestring):
                choices.append(c)
            else:
                choices.append(_convert_fuf_featstruct(c))
            val = nltk.FeatStruct(
                dict([('%d' % (i + 1), choice)
                      for i, choice in enumerate(choices)]))
        # Process the alt with a name
        if len(name) > 0:
            return "%s_%s" % (feat, name), val

        # there is an index defined on this alt
        if isinstance(index, SexpList):
            ifs = _convert_fuf_featstruct(index)
            val["_index_"] = ifs[":index"]
        return feat, val

    if isinstance(val, SexpList):
        # If value is a feature structure, then recurse.
        if val.lparen == '(':
            return feat, _convert_fuf_featstruct(val)
        # If value is a pointer, then do something.
        if val.lparen == '{':
            # We'll resolve this later, using _resolve_fs_links():
            return feat, ReentranceLink(val)
        else:
            assert False, 'unexpected sexp type'

    # Otherwise, return the value as a string.
    return feat, val
Ejemplo n.º 8
0
def feat2LFG(f):
    result = featureMapping.get(f, "")
    return(nltk.FeatStruct("".join( ("[", result, "]") )))
Ejemplo n.º 9
0
tokens = 'Kim likes children'.split()
from nltk import load_parser

cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
for tree in cp.parse(tokens):
    print(tree)

# 9.1.3 术语
# 简单的值通常称为原子。
#   原子值的一种特殊情况是布尔值。
# AGR是一个复杂值。
# 属性——值矩阵(Attribute-Value Matrix,AVM)

# 9.2 处理特征结构
# 特征结构的构建;两个不同特征结构的统一(合一)运算。
fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
print(fs1)

fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
print(fs1['GND'])
fs1['CASE'] = 'acc'
print(fs1)

fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
print(fs2)
print(fs2['AGR'])

print(nltk.FeatStruct("[POS='N',AGR=[PER=3, NUM='pl', GND='fem']]"))

# 特征结构也可以用来表示其他数据
print(nltk.FeatStruct(NAME='Lee', TELNO='13918181818', AGE=33))
Ejemplo n.º 10
0
surprise = {
    'CAT': 'V',
    'ORTH': 'surprised',
    'REL': 'surprise',
    'SRC': 'sbj',
    'EXP': 'obj'
}  # SRC = source; EXP = experiencer

nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')
tokens = 'Kim likes children'.split()
from nltk import load_parser
cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
trees = cp.parse(tokens)
for tree in trees:
    tree.draw()

fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
print fs1
print fs1['TENSE']
print nltk.FeatStruct("[POS='N', AGR=[PER=3, NUM='pl', GND='fem']]")

print nltk.FeatStruct(
    """[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],SPOUSE=[NAME='Kim', ADDRESS->(1)]]"""
)

fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
fs2 = nltk.FeatStruct(CITY='Paris')
print fs1.unify(fs2)

# More codes and details on http://www.nltk.org/book_1ed/ch09.html
Ejemplo n.º 11
0
    for subtree in tree.subtree(filter)
]

# 文法是难点,也是重点
#Chap.9 还是继续文法,需要扩充阅读

#9-1.基于特征的文法例子
nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')

tokens = 'Kim likes children'.split()
from nltk import load_parser

cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
trees = cp.parse_one(tokens)

fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
print(fs1)

fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
print(fs2['AGR']['PER'])
#将特征结构看作为有向无环图(directed acyclic graphs, DAGs)
#当两条路径具有相同的值时,它们被称为是
# 为了在我们的矩阵式表示中表示重入,我们将在共享的特征结构第一次出现的地方加一
# 个括号括起的数字前缀,例如(1)。以后任何对这个结构的引用将使用符号->(1),如下所
# 示。
# 等价的。
print nltk.FeatStruct(
    """[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
... SPOUSE=[NAME='Kim', ADDRESS->(1)]]""")
Ejemplo n.º 12
0
import nltk
from nltk import load_parser
"""
http://www.shareditor.com/blogshow?blogId=71
7. 文法分析还是基于特征好啊
"""

# 文法分析在于分析词语的排序

# 文法特征的限制:句法协议、属性、约束、术语

# 特征结构
fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
print(fs1)
fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
print(fs2)

# 提供的特征
cp = load_parser('/Users/xingoo/nltk_data/grammars/book_grammars/sql0.fcfg')
query = 'What cities are located in China'
for tree in cp.parse(nltk.word_tokenize(query)):
    print(tree)
Ejemplo n.º 13
0
import nltk
from nltk import load_parser

# 基于特征的文法
nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')

# 跟踪基于特征的图表分析器
tokens = 'Kim likes children'.split()
cp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)
for tree in cp.parse(tokens):
    print(tree)

# 构造特征结构
fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
print(fs1)
fs1['CASE'] = 'acc'
fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
print(fs2)

print(nltk.FeatStruct("[POS='N', AGR=[PER=3, NUM='pl', GND='fem']]"))
print(nltk.FeatStruct(NAME='Lee', TELNO='01 27 86 42 96', AGE=33))

# 结构共享
print(
    nltk.FeatStruct("""[NAME='Lee', 
                          ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'], 
                          SPOUSE=[NAME='Kim', ADDRESS->(1)]]"""))
print(nltk.FeatStruct("[A='a', B=(1)[C='c'], D->(1), E->(1)]"))

# 统一
fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
Ejemplo n.º 14
0
Archivo: 75.py Proyecto: jfilter/nlp
import nltk

fs1 = nltk.FeatStruct("[A = ?x, B= [C = ?x]]")
fs2 = nltk.FeatStruct("[B = [D = d]]")
fs3 = nltk.FeatStruct("[B = [C = d]]")
fs4 = nltk.FeatStruct("[A = (1)[B = b], C->(1)]")
fs5 = nltk.FeatStruct("[A = (1)[D = ?x], C = [E -> (1), F = ?x] ]")
fs6 = nltk.FeatStruct("[A = [D = d]]")
fs7 = nltk.FeatStruct("[A = [D = d], C = [F = [D = d]]]")
fs8 = nltk.FeatStruct("[A = (1)[D = ?x, G = ?x], C = [B = ?x, E -> (1)] ]")
fs9 = nltk.FeatStruct("[A = [B = b], C = [E = [G = e]]]")
fs10 = nltk.FeatStruct("[A = (1)[B = b], C -> (1)]")

print('f1 and f2')
print(fs1.unify(fs2))
print()
print(nltk.FeatStruct("[A = ?x, B= [C = ?x, D = d]]"))

print('f1 and f3')
print(fs1.unify(fs3))
print()
print(nltk.FeatStruct("[A = d, B= [C = d]]"))

print('f4 and f5')
print(fs4.unify(fs5))
print()
print(nltk.FeatStruct("[A = (1)[B = b, D = ?x, E -> (1), F = ?x], C->(1)]"))

print('f5 and f6')
print(fs5.unify(fs6))
print()
# Processing feature structures

import nltk
from nltk import load_parser
# in nltk we define feature structures as follows.
fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')


# we can view the structures as a kind of dictionary.
# such we can acess values by indexing in the ususal way.
print fs1['GND']

# adding to the feature structure
fs1['CASE'] = 'acc'

# We can also build more complex feature structures as follows.
fs2 = nltk.FeatStruct(POS='N', ARG=fs1)
print fs2
print fs2['ARG']
print fs2['ARG']['PER']

# feature structures are general purpose structures
# such we don't have to only use the for linguistic features.
print(nltk.FeatStruct(NAME='Lee', TELNO='01 27 86 42 96', AGE=33))

'''
    We can think of feature structures as graphs
    more specifically.

    Directed acyclic graphs (DAGs)