def operator_precedence_features(): features = [] pass_num = 0 with open('exp_data_turk_new.json', "r") as f: exps = json.load(f) for exp_num in tqdm(range(len(exps))): exp = exps[exp_num] if exp_num == int(len(exps) / 4) or exp_num == int( len(exps) / 2) or exp_num == int(len(exps) / 4 * 3): print(len(features)) try: e = exp['exp'] new_sent = pre_process_sent(e) sent_tokenized = print_tokenized(new_sent) quote_words = new_predicate(sent_tokenized) raw_lexicon = add_new_predicate(quote_words) lex = lexicon.fromstring(raw_lexicon, True) parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) for sent in sent_tokenized: for i, parse in enumerate(list(parser.parse(sent))): sem = parse_sem(str(parse.label()[0].semantics())) if sem != False: collect_features(sem, features) except: pass_num += 1 counter = Counter(features) prt = list(list(zip(*list(counter.most_common())))[0]) print(prt) print(len(prt)) print(pass_num, len(exps))
def speech_reco_core(): with open('/home/crazykoe/turtlebotws/lexicon.txt', 'r') as file: myLexicon = file.read() lex = lexicon.fromstring(myLexicon, True) parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) r = sr.Recognizer() with sr.Microphone() as source: print("What do you need?") audio = r.listen(source) try: print("I think you said " + r.recognize_google(audio) + ". Got it!") except sr.UnknownValueError: print("Please say it again.") except sr.RequestError as e: print("The service is down".format(e)) requestuni = r.recognize_google(audio) request = requestuni.encode("utf-8") cmd = request parses = list(parser.parse(cmd.lower().split())) if len(parses) != 0: (token, op) = parses[0][()].label() if token.semantics() is not None: output = str(token.semantics()) match = re.findall( "(?:action\((\w+)\) & target\((\w+)(?:\((\w+)\))?\)(?: &)?)+", output) if len(match) == 1: robotmove = array_msg() robotmove.action = match[0][0] robotmove.target = match[0][1] robotmove.name = match[0][2] robotmove.cmdaction = '' robotmove.targetroom = '' robotmove.names = '' else: robotmove = array_msg() robotmove.action = match[0][0] robotmove.target = match[0][1] robotmove.name = match[0][2] robotmove.cmdaction = match[1][0] robotmove.targetroom = match[1][1] robotmove.names = match[1][2] else: print('Unable to parse') return (robotmove)
def rule_features(): lis = [] pass_num = 0 with open('exp_data_new.json', "r") as f: exps = json.load(f) for exp_num in tqdm(range(len(exps))): exp = exps[exp_num] try: e = exp['exp'] new_sent = pre_process_sent(e) sent_tokenized = print_tokenized(new_sent) raw_lexicon = add_new_predicate(sent_tokenized) lex = lexicon.fromstring(raw_lexicon, True) parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) for parse in parser.parse(sent_tokenized[0].split()): recurse_print(parse, lis) except: pass_num += 1 pass with open('combrules.json', "w") as f: json.dump(lis, f)
def learn(lexicon, data): parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) learning_rate = 0.1 for x, y in data: weighted_results = parser.parse(x, return_weights=True) # Very dumb perceptron learning for result, score in weighted_results: print("\n================= %s / %s / %f" % (" ".join(x), result.label()[0].semantics(), score)) chart.printCCGDerivation(result) root_token, _ = result.label() correct = str(root_token.semantics()) == y sign = 1 if correct else -1 for _, leaf_token in result.pos(): leaf_token._weight += sign * 1 print()
everybody => NP {\P.forall x.(person(x) -> P(x))} admires => (S\\NP)/NP {\Y.(\Z.Z(\z.Y(\y.admire(z,y))))} complains => S\\NP {complain} ''', True) print(l3) print() print( '''==================================================================================== === Derivation for \'somebody admires everybody\' obtained with ApplicationRuleSet === === The semantics is the expected one. === ====================================================================================''' ) parser1 = chart.CCGChartParser(l3, chart.ApplicationRuleSet) parses = list(parser1.parse("somebody admires everybody".split())) printCCGDerivation(parses[0]) print(''' ======================================================================================= === Derivation for \'somebody admires everybody\' obtained with === === ForwardTypeRaiseRule + ForwardApplication. === === The result has scrambled scopes when run in the development branch. === =======================================================================================''' ) RightwardRuleSet = [ chart.BinaryCombinatorRule(chart.ForwardApplication), chart.ForwardTypeRaiseRule() ]
ministro => N anuncio => (S\\NP)/NP presidente => N desmintio => (S\\NP)/NP la => Det nueva => N/N ley => N compro => (S\\NP)/NP las => Det bebidas => N panaderia => N super => N pero => var\\.,var/.,var ''') parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) print "\n################Bienvenido################\n" entry = "" while (entry != "fin"): entry = raw_input("\nIngrese la oracion para realizar el analisis o fin para salir:\n> ") if (entry != "fin"): cont = 0 for parse in parser.parse(entry.split()): cont = cont + 1 print "\n" chart.printCCGDerivation(parse) break
def parse_tokens(one_sent_tokenize, raw_lexicon): """ CYK algorithm for parsing a tokenized sentence into a parse tree. We implement our own, as solely using NLTK's CCGChartParser and the grammar we came up won't allow for the parses we desired. As we are not linguists, we found it easier to change the code than figure out possible problems with our grammar. Outputs the last row of the CYK datastructure as possible parses for the sentence * Each element in the row is string version of nltk.tree.Tree (sort of, we actually construct our own tree based on the tree provided by NLTK) Arguments: one_sent_tokenize (arr) : array of string tokens representing a sentence raw_lexicon (str) : string representation of lexicon (grammar and vocabulary rep of a language) Returns: (arr) : list of possible parses, read comment above for more """ try: beam_lexicon = copy.deepcopy(raw_lexicon) CYK_form = [[[token] for token in one_sent_tokenize]] CYK_sem = [[]] for layer in range(1,len(one_sent_tokenize)): layer_form = [] layer_sem = [] lex = lexicon.fromstring(beam_lexicon, True) parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) for col in range(0,len(one_sent_tokenize)-layer): form = [] sem_temp = [] word_index = 0 st = col+0 ed = st+layer for splt in range(st,ed): words_L = CYK_form[splt-st][st] words_R = CYK_form[ed-splt-1][splt+1] for word_0 in words_L: for word_1 in words_R: try: for parse in parser.parse([word_0, word_1]): (token, op) = parse.label() categ = token.categ() sem = token.semantics() word_name = '$Layer{}_Horizon{}_{}'.format(str(layer), str(col),str(word_index)) word_index+=1 entry = "\n\t\t"+word_name+' => '+str(categ)+" {"+str(sem)+"}" if str(sem)+'_'+str(categ) not in sem_temp: form.append((parse,word_name,entry,str(sem))) sem_temp.append(str(sem)+'_'+str(categ)) except: pass add_form = [] for elem in form: parse, word_name, entry,sem_ = elem add_form.append(word_name) beam_lexicon = beam_lexicon+entry layer_sem.append(sem_) layer_form.append(add_form) CYK_form.append(layer_form) CYK_sem.append(layer_sem) return CYK_sem[-1] except: return []
object => NN {scene} thing => NN {scene} it => NN {scene} """, include_semantics=semantics) #TODO: #Left, right, etc #that #Hard one: #Is the purple thing the same shape as the large gray rubber thing? #equal_shape(query_shape(unique(filter_color(scene,u'purple'))),query_shape(unique(filter_material(filter_color(filter_size(scene,u'large'),u'gray'),u'rubber')))) parser = chart.CCGChartParser(lex2, chart.DefaultRuleSet) #results = list(parser.parse("the same shape as the big metallic object".split())) #results = list(parser.parse("a big brown object of the same shape as the green thing".split())) results = list(parser.parse("the material of the big purple object".split())) #results = list(parser.parse("any sphere to the left of it".split())) #results = list(parser.parse("the purple thing the same shape as the large gray rubber thing".split())) chart.printCCGDerivation(results[0]) #are there any other things that are => S {\x.exist(x)} #right => ADJ {\x.right(x,'right')} #right => NN {'right'} #front => ADJ {\x.front(x)} #front => NN {'front'} #behind => ADV {\x.filter_size(x,'behind')}