Python fromstring Beispiele, nltk.ccg.lexicon.fromstring Python Beispiele

Beispiel #1

0

Datei anzeigen

def operator_precedence_features():
    features = []
    pass_num = 0

    with open('exp_data_turk_new.json', "r") as f:
        exps = json.load(f)
    for exp_num in tqdm(range(len(exps))):
        exp = exps[exp_num]
        if exp_num == int(len(exps) / 4) or exp_num == int(
                len(exps) / 2) or exp_num == int(len(exps) / 4 * 3):
            print(len(features))
        try:
            e = exp['exp']
            new_sent = pre_process_sent(e)
            sent_tokenized = print_tokenized(new_sent)
            quote_words = new_predicate(sent_tokenized)
            raw_lexicon = add_new_predicate(quote_words)
            lex = lexicon.fromstring(raw_lexicon, True)
            parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
            for sent in sent_tokenized:
                for i, parse in enumerate(list(parser.parse(sent))):
                    sem = parse_sem(str(parse.label()[0].semantics()))
                    if sem != False:
                        collect_features(sem, features)
        except:
            pass_num += 1
    counter = Counter(features)
    prt = list(list(zip(*list(counter.most_common())))[0])
    print(prt)
    print(len(prt))
    print(pass_num, len(exps))

Beispiel #2

0

Datei anzeigen

def speech_reco_core():
    with open('/home/crazykoe/turtlebotws/lexicon.txt', 'r') as file:
        myLexicon = file.read()
    lex = lexicon.fromstring(myLexicon, True)
    parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("What do you need?")
        audio = r.listen(source)
    try:
        print("I think you said " + r.recognize_google(audio) + ". Got it!")
    except sr.UnknownValueError:
        print("Please say it again.")
    except sr.RequestError as e:
        print("The service is down".format(e))

    requestuni = r.recognize_google(audio)
    request = requestuni.encode("utf-8")
    cmd = request
    parses = list(parser.parse(cmd.lower().split()))
    if len(parses) != 0:
        (token, op) = parses[0][()].label()
        if token.semantics() is not None:
            output = str(token.semantics())
            match = re.findall(
                "(?:action\((\w+)\) & target\((\w+)(?:\((\w+)\))?\)(?: &)?)+",
                output)
            if len(match) == 1:
                robotmove = array_msg()
                robotmove.action = match[0][0]
                robotmove.target = match[0][1]
                robotmove.name = match[0][2]
                robotmove.cmdaction = ''
                robotmove.targetroom = ''
                robotmove.names = ''
            else:
                robotmove = array_msg()
                robotmove.action = match[0][0]
                robotmove.target = match[0][1]
                robotmove.name = match[0][2]
                robotmove.cmdaction = match[1][0]
                robotmove.targetroom = match[1][1]
                robotmove.names = match[1][2]
    else:
        print('Unable to parse')
    return (robotmove)

Beispiel #3

0

Datei anzeigen

def rule_features():
    lis = []
    pass_num = 0
    with open('exp_data_new.json', "r") as f:
        exps = json.load(f)
    for exp_num in tqdm(range(len(exps))):
        exp = exps[exp_num]
        try:
            e = exp['exp']
            new_sent = pre_process_sent(e)
            sent_tokenized = print_tokenized(new_sent)
            raw_lexicon = add_new_predicate(sent_tokenized)
            lex = lexicon.fromstring(raw_lexicon, True)
            parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
            for parse in parser.parse(sent_tokenized[0].split()):
                recurse_print(parse, lis)
        except:
            pass_num += 1
            pass
    with open('combrules.json', "w") as f:
        json.dump(lis, f)

Beispiel #4

0

Datei anzeigen

# Construct the lexicon
lex = fromstring("""
    :- S, NP, N, VP    # Primitive categories, S is the target primitive

    Det :: NP/N         # Family of words
    Pro :: NP
    TV :: VP/NP
    Modal :: (S\\NP)/VP # Backslashes need to be escaped

    I => Pro             # Word -> Category mapping
    you => Pro

    the => Det

    # Variables have the special keyword 'var'
    # '.' prevents permutation
    # ',' prevents composition
    and => var\\.,var/.,var

    which => (N\\N)/(S/NP)

    will => Modal # Categories can be either explicit, or families.
    might => Modal

    cook => TV
    eat => TV

    mushrooms => N
    parsnips => N
    bacon => N
    """)

Beispiel #5

0

Datei anzeigen

Datei: prescoped.py Projekt: MatsRooth/nltk

from nltk.ccg import chart, lexicon
from nltk.ccg.chart import printCCGDerivation

print('''==============================
===       Lexicon l3       ===
==============================
''')
l3 = lexicon.fromstring(
    '''
:- S, NP
Justin => NP {\P.P(j)}
Keisha => NP {\P.P(k)}
somebody => NP {\P.exists x.(person(x) & P(x))}
everybody => NP {\P.forall x.(person(x) -> P(x))}
admires => (S\\NP)/NP {\Y.(\Z.Z(\z.Y(\y.admire(z,y))))}
complains => S\\NP {complain}
''', True)

print(l3)
print()

print(
    '''====================================================================================
=== Derivation for \'somebody admires everybody\' obtained with ApplicationRuleSet ===
=== The semantics is the expected one.                                           ===
===================================================================================='''
)

parser1 = chart.CCGChartParser(l3, chart.ApplicationRuleSet)
parses = list(parser1.parse("somebody admires everybody".split()))
printCCGDerivation(parses[0])

Beispiel #6

0

Datei anzeigen

Datei: chart.py Projekt: Copper-Head/nltk

# Construct the lexicon
lex = fromstring('''
    :- S, NP, N, VP    # Primitive categories, S is the target primitive

    Det :: NP/N         # Family of words
    Pro :: NP
    TV :: VP/NP
    Modal :: (S\\NP)/VP # Backslashes need to be escaped

    I => Pro             # Word -> Category mapping
    you => Pro

    the => Det

    # Variables have the special keyword 'var'
    # '.' prevents permutation
    # ',' prevents composition
    and => var\\.,var/.,var

    which => (N\\N)/(S/NP)

    will => Modal # Categories can be either explicit, or families.
    might => Modal

    cook => TV
    eat => TV

    mushrooms => N
    parsnips => N
    bacon => N
    ''')

Beispiel #7

0

Datei anzeigen

Datei: CCG_utils.py Projekt: INK-USC/LEAN-LIFE

def parse_tokens(one_sent_tokenize, raw_lexicon):
    """
        CYK algorithm for parsing a tokenized sentence into a parse tree. We implement our own, as solely
        using NLTK's CCGChartParser and the grammar we came up won't allow for the parses we desired. As
        we are not linguists, we found it easier to change the code than figure out possible problems with
        our grammar.

        Outputs the last row of the CYK datastructure as possible parses for the sentence
            * Each element in the row is string version of nltk.tree.Tree (sort of, we actually construct our
              own tree based on the tree provided by NLTK)

        Arguments:
            one_sent_tokenize (arr) : array of string tokens representing a sentence
            raw_lexicon       (str) : string representation of lexicon (grammar and vocabulary rep of a language)
        
        Returns:
            (arr) : list of possible parses, read comment above for more
    """
    try:
        beam_lexicon = copy.deepcopy(raw_lexicon)
        CYK_form = [[[token] for token in one_sent_tokenize]]
        CYK_sem = [[]]
        for layer in range(1,len(one_sent_tokenize)):
            layer_form = []
            layer_sem = []
            lex = lexicon.fromstring(beam_lexicon, True)
            parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
            for col in range(0,len(one_sent_tokenize)-layer):
                form = []
                sem_temp = []
                word_index = 0
                st = col+0
                ed = st+layer
                for splt in range(st,ed):
                    words_L = CYK_form[splt-st][st]
                    words_R = CYK_form[ed-splt-1][splt+1]
                    for word_0 in words_L:
                        for word_1 in words_R:
                            try:
                                for parse in parser.parse([word_0, word_1]):
                                    (token, op) = parse.label()
                                    categ = token.categ()
                                    sem = token.semantics()
                                    word_name = '$Layer{}_Horizon{}_{}'.format(str(layer), str(col),str(word_index))
                                    word_index+=1
                                    entry = "\n\t\t"+word_name+' => '+str(categ)+" {"+str(sem)+"}"
                                    if str(sem)+'_'+str(categ) not in sem_temp:
                                        form.append((parse,word_name,entry,str(sem)))
                                        sem_temp.append(str(sem)+'_'+str(categ))
                            except:
                                pass
                add_form = []
                for elem in form:
                    parse, word_name, entry,sem_ = elem
                    add_form.append(word_name)
                    beam_lexicon = beam_lexicon+entry
                    layer_sem.append(sem_)
                layer_form.append(add_form)
            CYK_form.append(layer_form)
            CYK_sem.append(layer_sem)
        return CYK_sem[-1]
    except:
        return []

Beispiel #8

0

Datei anzeigen

Datei: ccg-syntax-parse.py Projekt: williamlwclwc/UoM-COMP24412-Symbolic-AI

from nltk.ccg import chart, lexicon

lex = lexicon.fromstring('''
    :- S, N, NP
    Steel => N
    is => (S\\N)/NP
    an => NP/N
    alloy => N
    contains => (S\\N)/N
    carbon => N
    Does => NP/NP
    Ferrite => NP
    have => (S\\NP)/N
    high => N/N
    hardness => N
    Which => S/(S\\NP)/N
    material => N
    has => (S\\NP)/NP
    the => NP/N
    lowest => N/N
    tensile => N/N
    strength => N
    ''', 
    False)

parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
for parse in parser.parse("Steel is an alloy".split()):
    chart.printCCGDerivation(parse)
    break

for parse in parser.parse("Steel contains carbon".split()):

Beispiel #9

0

Datei anzeigen

Datei: ccgAndres.py Projekt: hans/clevr-oneshot

lex2 = lexicon.fromstring(r"""
    :- NN, INP, ADJ, DET, IN

    DET :: NN/NN
    ADJ :: NN/NN
    IN :: (NN\NN)/NN
    ADV :: NN/ADJ
    QS :: NN/NN

    same => ADJ {\x.same_(x)}

    same => IN {\x y.same_(x,y)}


    material => NN {'material'}
    color => NN {'color'}
    shape => NN {'shape'}
    size => NN {'size'}

    as => IN {\x y.as(x,y)}
    of => IN {\x y.of(x,y)}

    to => DET {\x.relate(x)}

    the => DET {\P.(P)}
    a => DET {\P.(P)}
    an => DET {\P.(P)}
    any => DET {\P.(P)}


    metallic => ADJ {\x.filter_material(x,'metal')}
    metal => ADJ {\x.filter_material(x,'metal')}
    shiny => ADJ {\x.filter_material(x,'metal')}
    rubber => ADJ {\x.filter_material(x,'rubber')}
    matte => ADJ {\x.filter_material(x,'rubber')}

    gray => ADJ {\x.filter_color(x,'gray')}
    red => ADJ {\x.filter_color(x,'red')}
    blue => ADJ {\x.filter_color(x,'blue')}
    green => ADJ {\x.filter_color(x,'green')}
    brown => ADJ {\x.filter_color(x,'brown')}
    purple => ADJ {\x.filter_color(x,'purple')}
    cyan => ADJ {\x.filter_color(x,'cyan')}
    yellow => ADJ {\x.filter_color(x,'yellow')}

    big => ADJ {\x.filter_size(x,'large')}
    large => ADJ {\x.filter_size(x,'large')}
    small => ADJ {\x.filter_size(x,'small')}
    tiny => ADJ {\x.filter_size(x,'small')}

    left => ADJ {\x.left(x,'prueba')}
    left => NN {'left'}

    cube => NN {'cube'}
    block => NN {'cube'}
    sphere => ADJ {\x.filter_shape(x,'sphere')}
    spheres => NN {'sphere'}
    ball => NN {'sphere'}
    cylinder => NN {'cylinder'}

    what_is => QS {\x.query_(x)}
    
    are_there => QS {\x.query_(x)}
    is_there => QS {\x.query_(x)}


    
    object => NN {scene}
    thing => NN {scene}
    it => NN {scene}

    """,
                          include_semantics=semantics)

Beispiel #10

0

Datei anzeigen

Datei: ccg.py Projekt: hans/clevr-oneshot

"""

from nltk.ccg import chart, lexicon

question = "Are there any other things that are the same shape as the big metallic object?"
parse = "exist(same_shape(unique(filter_material(filter_size(scene, 'large'), 'metal'))))"

lex = lexicon.fromstring(r"""
    :- NN, INP, ADJ, DET, IN

    DET :: NN/NN
    ADJ :: NN/NN
    IN :: (NN\NN[comp])/NN

    same => ADJ {\x.same(x)}
    shape => NN {'shape'}
    as => IN {\x y.pair(x,y)}

    the => DET {\P.unique(P)}

    big => ADJ {\x.filter_size(x,'large')}
    metallic => ADJ {\x.filter_material(x,'metal')}

    object => NN {scene}""",
                         include_semantics=True)

parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
results = list(
    parser.parse("the same shape as the big metallic object".split()))
for parse in results[:3]:
    chart.printCCGDerivation(parse)

Beispiel #11

0

Datei anzeigen

from nltk.ccg import chart, lexicon

import numpy as np

semantics = True

lex = lexicon.fromstring(r"""
    :- NN, DET, ADJ

    DET :: NN/NN
    ADJ :: NN/NN

    the => DET {\x.unique(x)}

    blue => ADJ {\x.color(x,red)}
    blue => ADJ {\x.color(x,blue)}

    red => ADJ {\x.color(x,red)}
    red => ADJ {\x.color(x,blue)}

    ball => NN {ball}
    """,
                         include_semantics=semantics)

data = [
    ("the blue ball".split(), "unique(color(ball,blue))"),
    ("the red ball".split(), "unique(color(ball,red))"),
]