def convert_nlp_test(pred): global test_cnt # print "% ", unicode(pred) lang = pred.args[0].name ivr_in = pred.args[1].args[0].args[0] ivr_out = pred.args[1].args[1].args[0] head = Predicate(name='nlp_test', args=[ StringLiteral(MODULE_NAME), Predicate(name=lang), StringLiteral('t%04d' % test_cnt), Predicate(name='FIXME'), ListLiteral([ivr_in, ivr_out, ListLiteral([])]) ]) test_cnt += 1 clause = Clause(head=head) print unicode(clause)
def push_context(self, key, value): l = self.read_context(key) # logging.debug ('context %s before push: %s' % (key, l)) if not l: l = ListLiteral([]) l.l.insert(0, value) l.l = l.l[:MAX_CONTEXT_LEN] # logging.debug ('context %s after push: %s' % (key, l)) self.write_context(key, l)
def _setup_context (self, user, lang, inp, prev_context, prev_res): cur_context = Predicate(do_gensym (self.rt, 'context')) res = { } if ASSERT_OVERLAY_VAR_NAME in prev_res: res[ASSERT_OVERLAY_VAR_NAME] = prev_res[ASSERT_OVERLAY_VAR_NAME].clone() res = do_assertz ({}, Clause ( Predicate('user', [cur_context, Predicate(user)]) , location=self.dummyloc), res=res) res = do_assertz ({}, Clause ( Predicate('lang', [cur_context, Predicate(lang)]) , location=self.dummyloc), res=res) token_literal = ListLiteral (list(map(lambda x: StringLiteral(x), inp))) res = do_assertz ({}, Clause ( Predicate('tokens', [cur_context, token_literal]) , location=self.dummyloc), res=res) currentTime = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC).isoformat() res = do_assertz ({}, Clause ( Predicate('time', [cur_context, StringLiteral(currentTime)]) , location=self.dummyloc), res=res) if prev_context: res = do_assertz ({}, Clause ( Predicate('prev', [cur_context, prev_context]) , location=self.dummyloc), res=res) # copy over all previous context statements to the new one s1s = self.rt.search_predicate ('context', [prev_context, '_1', '_2'], env=res) for s1 in s1s: res = do_assertz ({}, Clause ( Predicate('context', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res) # copy over all previous mem statements to the new one s1s = self.rt.search_predicate ('mem', [prev_context, '_1', '_2'], env=res) for s1 in s1s: res = do_assertz ({}, Clause ( Predicate('mem', [cur_context, s1['_1'], s1['_2']]) , location=self.dummyloc), res=res) # import pdb; pdb.set_trace() res['C'] = cur_context return res, cur_context
def builtin_tokenize(g, pe): """ tokenize (+Lang, +Str, -Tokens) """ pe._trace('CALLED BUILTIN tokenize', g) pred = g.terms[g.inx] args = pred.args if len(args) != 3: raise PrologRuntimeError('tokenize: 3 args expected.', g.location) arg_lang = pe.prolog_eval(args[0], g.env, g.location) if not isinstance(arg_lang, Predicate) or len(arg_lang.args) > 0: raise PrologRuntimeError( 'tokenize: first argument: constant expected, %s found instead.' % repr(args[0]), g.location) arg_str = pe.prolog_get_string(args[1], g.env, g.location) arg_tokens = pe.prolog_get_variable(args[2], g.env, g.location) tokens = list( map(lambda s: StringLiteral(s), tokenize(arg_str, lang=arg_lang.name))) g.env[arg_tokens] = ListLiteral(tokens) return True
def convert_macro_string(ms): pos = 0 state = STATE_NORMAL curs = u'' res = [] while pos<len(ms): c = ms[pos] if state == STATE_NORMAL: if c=='@': if len(curs.strip())>0: res.append(StringLiteral(curs.strip())) curs = u'' state = STATE_MACRO elif c=='(': if len(curs.strip())>0: res.append(StringLiteral(curs.strip())) curs = u'' choices = [] state = STATE_CHOICE else: curs += c elif state == STATE_MACRO: if c==' ': state = STATE_NORMAL elif state == STATE_CHOICE: if c==')': state = STATE_NORMAL choices.append(StringLiteral(curs)) curs = u'' res.append(ListLiteral(choices)) elif c=='|': choices.append(StringLiteral(curs)) curs = u'' else: curs += c pos += 1 if state == STATE_NORMAL: if len(curs.strip())>0: res.append(StringLiteral(curs.strip())) return ListLiteral(res)
def _prolog_from_json(o): if o['pt'] == 'Constant': return Predicate(o['name']) if o['pt'] == 'StringLiteral': return StringLiteral(o['s']) if o['pt'] == 'NumberLiteral': return NumberLiteral(o['f']) if o['pt'] == 'ListLiteral': return ListLiteral(o['l']) raise PrologRuntimeError('cannot convert from json: %s .' % repr(o))
def builtin_context_get_fn(pred, env, rt): """ context_get(+Name) """ rt._trace_fn('CALLED FUNCTION context_get', g) args = pred.args if len(args) != 1: raise PrologRuntimeError('context_get: 1 arg expected.') key = args[0].name v = pe.read_context(key) if not v: return ListLiteral([]) return v
def process_input (self, utterance, utt_lang, user_uri, test_mode=False, trace=False): """ process user input, return action(s) """ gn = rdflib.Graph(identifier=CONTEXT_GRAPH_NAME) tokens = tokenize(utterance, utt_lang) self.kb.remove((CURIN, None, None, gn)) quads = [ ( CURIN, KB_PREFIX+u'user', user_uri, gn), ( CURIN, KB_PREFIX+u'utterance', utterance, gn), ( CURIN, KB_PREFIX+u'uttLang', utt_lang, gn), ( CURIN, KB_PREFIX+u'tokens', pl_literal_to_rdf(ListLiteral(tokens), self.kb), gn) ] if test_mode: quads.append( ( CURIN, KB_PREFIX+u'currentTime', pl_literal_to_rdf(NumberLiteral(TEST_TIME), self.kb), gn ) ) else: quads.append( ( CURIN, KB_PREFIX+u'currentTime', pl_literal_to_rdf(NumberLiteral(time.time()), self.kb), gn ) ) self.kb.addN_resolve(quads) self.prolog_rt.reset_actions() if test_mode: for dr in self.db.session.query(model.DiscourseRound).filter(model.DiscourseRound.inp==utterance, model.DiscourseRound.lang==utt_lang): prolog_s = ','.join(dr.resp.split(';')) logging.info("test tokens=%s prolog_s=%s" % (repr(tokens), prolog_s) ) c = self.parser.parse_line_clause_body(prolog_s) # logging.debug( "Parse result: %s" % c) # logging.debug( "Searching for c: %s" % c ) solutions = self.prolog_rt.search(c) # if len(solutions) == 0: # raise PrologError ('nlp_test: %s no solution found.' % clause.location) # print "round %d utterances: %s" % (round_num, repr(prolog_rt.get_utterances())) return self.prolog_rt.get_actions()
def rdf_to_pl(l): value = unicode(l) if isinstance(l, rdflib.Literal): if l.datatype: datatype = str(l.datatype) if datatype == 'http://www.w3.org/2001/XMLSchema#decimal': value = NumberLiteral(float(value)) elif datatype == 'http://www.w3.org/2001/XMLSchema#float': value = NumberLiteral(float(value)) elif datatype == 'http://www.w3.org/2001/XMLSchema#integer': value = NumberLiteral(float(value)) elif datatype == 'http://www.w3.org/2001/XMLSchema#dateTime': dt = dateutil.parser.parse(value) value = NumberLiteral(time.mktime(dt.timetuple())) elif datatype == 'http://www.w3.org/2001/XMLSchema#date': dt = dateutil.parser.parse(value) value = NumberLiteral(time.mktime(dt.timetuple())) elif datatype == DT_LIST: value = json.JSONDecoder( object_hook=_prolog_from_json).decode(value) elif datatype == DT_CONSTANT: value = Predicate(value) else: raise PrologRuntimeError( 'sparql_query: unknown datatype %s .' % datatype) else: if l.value is None: value = ListLiteral([]) else: value = StringLiteral(value) else: value = StringLiteral(value) return value
def process_input(self, utterance, utt_lang, user_uri, test_mode=False, trace=False): """ process user input, return action(s) """ gn = rdflib.Graph(identifier=CONTEXT_GRAPH_NAME) tokens = tokenize(utterance, utt_lang) self.kb.remove((CURIN, None, None, gn)) quads = [(CURIN, KB_PREFIX + u'user', user_uri, gn), (CURIN, KB_PREFIX + u'utterance', utterance, gn), (CURIN, KB_PREFIX + u'uttLang', utt_lang, gn), (CURIN, KB_PREFIX + u'tokens', pl_literal_to_rdf(ListLiteral(tokens), self.kb), gn)] if test_mode: quads.append((CURIN, KB_PREFIX + u'currentTime', pl_literal_to_rdf(NumberLiteral(TEST_TIME), self.kb), gn)) else: quads.append((CURIN, KB_PREFIX + u'currentTime', pl_literal_to_rdf(NumberLiteral(time.time()), self.kb), gn)) self.kb.addN_resolve(quads) self.prolog_rt.reset_actions() self.prolog_rt.set_trace(trace) if test_mode: prolog_s = None for dr in self.db.session.query(model.DiscourseRound).filter( model.DiscourseRound.inp == utterance, model.DiscourseRound.lang == utt_lang): prolog_s = ','.join(dr.resp.split(';')) logging.info("test tokens=%s prolog_s=%s" % (repr(tokens), prolog_s)) if not prolog_s: logging.error('test utterance %s not found!' % utterance) return [] else: x = self.nlp_model.compute_x(utterance) logging.debug("x: %s -> %s" % (utterance, x)) # which bucket does it belong to? bucket_id = min([ b for b in xrange(len(self.nlp_model.buckets)) if self.nlp_model.buckets[b][0] > len(x) ]) # get a 1-element batch to feed the sentence to the model encoder_inputs, decoder_inputs, target_weights = self.tf_model.get_batch( {bucket_id: [(x, [])]}, bucket_id) # print "encoder_inputs, decoder_inputs, target_weights", encoder_inputs, decoder_inputs, target_weights # get output logits for the sentence _, _, output_logits = self.tf_model.step(self.tf_session, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) logging.debug("output_logits: %s" % repr(output_logits)) # this is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # print "outputs", outputs preds = map(lambda o: self.inv_output_dict[o], outputs) logging.debug("preds: %s" % repr(preds)) prolog_s = '' do_and = True # import pdb; pdb.set_trace() for p in preds: if p[0] == '_': continue # skip _EOS if p == u'or': do_and = False continue if len(prolog_s) > 0: if do_and: prolog_s += ', ' else: prolog_s += '; ' prolog_s += p do_and = True logging.debug('?- %s' % prolog_s) abufs = [] c = self.parser.parse_line_clause_body(prolog_s) # logging.debug( "Parse result: %s" % c) # logging.debug( "Searching for c: %s" % c ) solutions = self.prolog_rt.search(c) # if len(solutions) == 0: # raise PrologError ('nlp_test: %s no solution found.' % clause.location) # print "round %d utterances: %s" % (round_num, repr(prolog_rt.get_utterances())) abufs = self.prolog_rt.get_actions() return abufs
def builtin_sparql_query(g, pe): pe._trace('CALLED BUILTIN sparql_query', g) pred = g.terms[g.inx] args = pred.args if len(args) < 1: raise PrologRuntimeError('sparql_query: at least 1 argument expected.') query = pe.prolog_get_string(args[0], g.env) # logging.debug("builtin_sparql_query called, query: '%s'" % query) # run query result = pe.kb.query(query) # logging.debug("builtin_sparql_query result: '%s'" % repr(result)) if len(result) == 0: return False # turn result into lists of literals we can then bind to prolog variables res_map = {} res_vars = {} # variable idx -> variable name for binding in result: for v in binding.labels: l = binding[v] value = rdf_to_pl(l) if not v in res_map: res_map[v] = [] res_vars[binding.labels[v]] = v res_map[v].append(value) # logging.debug("builtin_sparql_query res_map : '%s'" % repr(res_map)) # logging.debug("builtin_sparql_query res_vars: '%s'" % repr(res_vars)) # apply bindings to environment vars v_idx = 0 for arg in args[1:]: sparql_var = res_vars[v_idx] prolog_var = pe.prolog_get_variable(arg, g.env) value = res_map[sparql_var] # logging.debug("builtin_sparql_query mapping %s -> %s: '%s'" % (sparql_var, prolog_var, value)) g.env[prolog_var] = ListLiteral(value) v_idx += 1 return True
def _rdf_exec(g, pe, generate_lists=False): # rdflib.plugins.sparql.parserutils.CompValue # # class CompValue(OrderedDict): # def __init__(self, name, **values): # # SelectQuery( # p = # Project( # p = # LeftJoin( # p2 = # BGP( # triples = [(rdflib.term.Variable(u'leaderobj'), rdflib.term.URIRef(u'http://dbpedia.org/ontology/leader'), rdflib.term.Variable(u'leader'))] # _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'leader')]) # ) # expr = # TrueFilter( # _vars = set([]) # ) # p1 = # BGP( # triples = [(rdflib.term.Variable(u'leader'), rdflib.term.URIRef(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef(u'http://schema.org/Person')), (rdflib.term.Variable(u'leader'), rdflib.term.URIRef(u'http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Variable(u'label'))] # _vars = set([rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')]) # ) # _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')]) # ) # PV = [rdflib.term.Variable(u'leader'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leaderobj')] # _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')]) # ) # datasetClause = None # PV = [rdflib.term.Variable(u'leader'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leaderobj')] # _vars = set([rdflib.term.Variable(u'leaderobj'), rdflib.term.Variable(u'label'), rdflib.term.Variable(u'leader')]) # ) pred = g.terms[g.inx] args = pred.args # if len(args) == 0 or len(args) % 3 != 0: # raise PrologRuntimeError('rdf: one or more argument triple(s) expected, got %d args' % len(args)) distinct = False triples = [] optional_triples = [] filters = [] limit = 0 offset = 0 arg_idx = 0 var_map = {} # string -> rdflib.term.Variable while arg_idx < len(args): arg_s = args[arg_idx] # check for optional structure if isinstance(arg_s, Predicate) and arg_s.name == 'optional': s_args = arg_s.args if len(s_args) != 3: raise PrologRuntimeError('rdf: optional: triple arg expected') arg_s = s_args[0] arg_p = s_args[1] arg_o = s_args[2] logging.debug('rdf: optional arg triple: %s' % repr( (arg_s, arg_p, arg_o))) optional_triples.append( (pl_to_rdf(arg_s, g.env, pe, var_map, pe.kb), pl_to_rdf(arg_p, g.env, pe, var_map, pe.kb), pl_to_rdf(arg_o, g.env, pe, var_map, pe.kb))) arg_idx += 1 # check for filter structure elif isinstance(arg_s, Predicate) and arg_s.name == 'filter': logging.debug('rdf: filter structure detected: %s' % repr(arg_s.args)) s_args = arg_s.args # transform multiple arguments into explicit and-tree pl_expr = s_args[0] for a in s_args[1:]: pl_expr = Predicate('and', [pl_expr, a]) filters.append( prolog_to_filter_expression(pl_expr, g.env, pe, var_map, pe.kb)) arg_idx += 1 # check for distinct elif isinstance(arg_s, Predicate) and arg_s.name == 'distinct': s_args = arg_s.args if len(s_args) != 0: raise PrologRuntimeError( 'rdf: distinct: unexpected arguments.') distinct = True arg_idx += 1 # check for limit/offset elif isinstance(arg_s, Predicate) and arg_s.name == 'limit': s_args = arg_s.args if len(s_args) != 1: raise PrologRuntimeError('rdf: limit: one argument expected.') limit = pe.prolog_get_int(s_args[0], g.env) arg_idx += 1 elif isinstance(arg_s, Predicate) and arg_s.name == 'offset': s_args = arg_s.args if len(s_args) != 1: raise PrologRuntimeError('rdf: offset: one argument expected.') offset = pe.prolog_get_int(s_args[0], g.env) arg_idx += 1 else: if arg_idx > len(args) - 3: raise PrologRuntimeError( 'rdf: not enough arguments for triple') arg_p = args[arg_idx + 1] arg_o = args[arg_idx + 2] logging.debug('rdf: arg triple: %s' % repr((arg_s, arg_p, arg_o))) triples.append( (pl_to_rdf(arg_s, g.env, pe, var_map, pe.kb), pl_to_rdf(arg_p, g.env, pe, var_map, pe.kb), pl_to_rdf(arg_o, g.env, pe, var_map, pe.kb))) arg_idx += 3 logging.debug('rdf: triples: %s' % repr(triples)) logging.debug('rdf: optional_triples: %s' % repr(optional_triples)) logging.debug('rdf: filters: %s' % repr(filters)) if len(triples) == 0: raise PrologRuntimeError( 'rdf: at least one non-optional triple expected') var_list = var_map.values() var_set = set(var_list) p = CompValue('BGP', triples=triples, _vars=var_set) for t in optional_triples: p = CompValue('LeftJoin', p1=p, p2=CompValue('BGP', triples=[t], _vars=var_set), expr=CompValue('TrueFilter', _vars=set([]))) for f in filters: p = CompValue('Filter', p=p, expr=f, _vars=var_set) if limit > 0: p = CompValue('Slice', start=offset, length=limit, p=p, _vars=var_set) if distinct: p = CompValue('Distinct', p=p, _vars=var_set) algebra = CompValue('SelectQuery', p=p, datasetClause=None, PV=var_list, _vars=var_set) result = pe.kb.query_algebra(algebra) logging.debug('rdf: result (len: %d): %s' % (len(result), repr(result))) if len(result) == 0: return False if generate_lists: # bind each variable to list of values for binding in result: for v in binding.labels: l = binding[v] value = rdf_to_pl(l) if not v in g.env: g.env[v] = ListLiteral([]) g.env[v].l.append(value) return True else: # turn result into list of bindings res_bindings = [] for binding in result: res_binding = {} for v in binding.labels: l = binding[v] value = rdf_to_pl(l) res_binding[v] = value res_bindings.append(res_binding) if len(res_bindings) == 0 and len(result) > 0: res_bindings.append({}) # signal success logging.debug('rdf: res_bindings: %s' % repr(res_bindings)) return res_bindings
def builtin_context_score(g, pe): """ context_score(+Name, ?Value, +Points, ?Score [, +MinPoints]) """ pe._trace('CALLED BUILTIN context_score', g) pred = g.terms[g.inx] args = pred.args if len(args) < 4: raise PrologRuntimeError('context_score: at least 4 args expected.') if len(args) > 5: raise PrologRuntimeError('context_score: max 5 args expected.') key = args[0].name value = pe.prolog_eval(args[1], g.env) points = pe.prolog_get_float(args[2], g.env) scorev = pe.prolog_get_variable(args[3], g.env) if len(args) == 5: min_score = pe.prolog_get_float(args[4], g.env) else: min_score = 0.0 score = g.env[scorev].f if scorev in g.env else 0.0 if value: stack = pe.read_context(key) if stack: i = 1 for v in stack.l: if v == value: score += points / float(i) break i += 1 if score < min_score: return False g.env[scorev] = NumberLiteral(score) return True if not isinstance(args[1], Variable): raise PrologRuntimeError( u'score_context: arg 2 literal or variable expected, %s found instead.' % unicode(args[1])) res = [] stack = pe.read_context(key) if stack: i = 1 for v in stack.l: s = score + points / float(i) if s >= min_score: res.append({ args[1].name: v, scorev: NumberLiteral(score + points / float(i)) }) i += 1 else: if score >= min_score: res.append({ args[1].name: ListLiteral([]), scorev: NumberLiteral(score) }) return res