class Disease(Particle): regex = Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) |\ Pos("VBN") def interpret(self, match): return HasDesease(match.words.tokens)
class MilitaryConflict(Particle): regex = Question(Pos("DT")) + nouns def interpret(self, match): name = match.words.tokens return IsMilitaryConflict() + LabelOfFixedDataRelation(name)
#!/usr/bin/python from refo import Literal, Question, match import re import time # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. # You might want to try with N = 20, 30, 40, 100 to see what happens N = 25 a = Literal("a") string = "a" * N regex = Question(a) * N + a * N start = time.time() m = match(regex, string) end = time.time() print "Refo finished in {0:.2} seconds".format(end - start) regex = "(:?a?){{{0}}}a{{{0}}}".format(N) regex = re.compile(regex) start = time.time() regex.match(string) end = time.time() print "Python re finished in {0:.2} seconds".format(end - start)
class Thing(Particle): regex = Plus(Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) | Pos("VBN")) def interpret(self, match): return HasKeyword(match.words.tokens)
class SpeakersOfQuestion(QuestionTemplate): """ Regex for questions about the number of people that speaks a language Ex: "How many people speaks English language?" "How many people speak Canadian French?" "How many people in the world can speak Arabic language?" """ regex = (Lemmas("how many") + Lemma("people") + (Lemma("speaks") | Lemma("speak")) + Language() + Question(Pos("."))) | \ (Lemmas("how many") + Lemma("people") + Pos("IN") + Pos("DT") + Lemma("world") + Lemma("can") + (Lemma("speak") | Lemma("speaks")) + Language() + Question(Pos("."))) def interpret(self, match): NumberOfSpeakers = SpeakersOf(match.language) return NumberOfSpeakers, "literal"
class Movie(Particle): regex = Question(Pos("DT")) + nouns def semantics(self, match): name = match.words.tokens return IsMovie() + HasName(name)
from refo import Literal, Question, match import re import time # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. # You might want to try with N = 20, 30, 40, 100 to see what happens N = 30 a = Literal("a") string = "a" * N regex = Question(a) * N + a * N start = time.time() m = match(regex, string) end = time.time() print "Refo finished in {0:.2} seconds".format(end - start) regex = "(:?a?){{{0}}}a{{{0}}}".format(N) print "Trying", regex regex = re.compile(regex) start = time.time() regex.match(string) end = time.time() print "Python re finished in {0:.2} seconds".format(end - start)
class WhereIsHomePageQuestion(QuestionTemplate): """ Ex: "Where to find foaf documentation?" """ regex = Lemmas("where to") + Lemma("find") + Vocabulary() + Lemma("documentation") + Question(Pos(".")) def interpret(self, match): home_uri = dsl.IsHomePageOf(match.vocabulary) return home_uri, "url"
class ProteinOfQuestion(QuestionTemplate): """ Regex for questions about the protein in species Ex: "How much protein in an apple?" "How much protein an Apple have?" "Do Apple have protein?" """ regex = Lemmas("how much") + Lemma("protein") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \ (Lemmas("how much") + Lemma("protein") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \ (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("protein") + Question(Pos("."))) def interpret(self, match): name = match.species.tokens species = IsSpecies() + HasKeyword(name) protein = ProteinOf(species) return protein, "enum"
class SugarOfQuestion(QuestionTemplate): """ Regex for questions about the sugar in species Ex: "How much sugar in an Apple?" "How much sugar an Apple have?" "Do Apple have sugar?" """ regex = Lemmas("how much") + Lemma("sugar") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \ (Lemmas("how much") + Lemma("sugar") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \ (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("sugar") + Question(Pos("."))) def interpret(self, match): name = match.species.tokens species = IsSpecies() + HasKeyword(name) suger = SugarOf(species) return suger, "enum"
class CarbsOfQuestion(QuestionTemplate): """ Regex for questions about the carbs in species Ex: "How much carbs in an apple?" "How much carbs an Apple have?" "Do Apple have carbs?" """ regex = Lemmas("how much") + Lemma("carbs") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \ (Lemmas("how much") + Lemma("carbs") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \ (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("carbs") + Question(Pos("."))) def interpret(self, match): name = match.species.tokens species = IsSpecies() + HasKeyword(name) carbs = CarbsOf(species) return carbs, "enum"
class OwnerOfQuestion(QuestionTemplate): """ Regex for questions about the owner of a hotel Ex: "who is the owner of Capital Hilton?" """ regex = (Lemmas("who be") + Lemma("the") + Lemma("owner") + Pos("IN") + Hotel() + Question(Lemma("hotel")) + Question(Pos("."))) def interpret(self, match): Owner = OwnerOf(match.hotel) return NameOf(Owner), "enum"
class Movie(Particle): regex = Question(Pos("DT")) + \ Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")) def semantics(self, match): return match.words.tokens
class WhatPilot(QuestionTemplate): """ Ex: "What is the pilot of foaf?" """ regex = Lemma("what") + Lemma("be") + Lemma("the") + Lemma("pilot") + Pos("IN") + Vocabulary() + Question(Pos(".")) def interpret(self, match): pilot = dsl.HasPilot(match.vocabulary) return pilot, "pilot"
class GradStudentQuestion(QuestionTemplate): """ Regex for questions about the number of graduate students in a university Ex: "How many postgraduate students in York University?" "How many post grad students in University of Toronto?" "How many post graduate students in University of Toronto?" "Number of postgraduate students in York University?" "Number of post grad students in Harvard University?" "Number of post graduate students in Harvard University?" """ regex = ((Lemmas("how many")| Lemmas("number of")) + Lemma("postgraduate") + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) | \ ((Lemmas("how many") | Lemmas("number of")) + Lemma("post") + (Lemma("grad") | Lemma("graduate")) + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) def interpret(self, match): GradStudent = GradStudentOf(match.university) return GradStudent, "literal"
class WhatIsNamespaceQuestion(QuestionTemplate): """ Ex: "what is the namespace of dcterms?" """ regex = Lemma("what") + Lemma("be") + Lemmas("the namespace") + Pos("IN") + Vocabulary() + Question(Pos(".")) def interpret(self, match): uri = dsl.HasURI(match.vocabulary) return uri, "url"
class UndergradStudentQuestion(QuestionTemplate): """ Regex for questions about the number of undergraduate students in a university Ex: "How many undergraduate students in York University?" "How many under grad students in University of Toronto?" "How many under graduate students in University of Toronto?" "Number of undergraduate students in York University?" "Number of under grad students in McGill University?" "Number of under graduate students in McGill University?" """ regex = ((Lemmas("how many") | Lemmas("number of")) + Lemma("undergraduate") + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) | \ ((Lemmas("how many")| Lemmas("number of")) + Lemma("under") + (Lemma("grad") | Lemma("graduate")) + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) def interpret(self, match): UndergradStudent = UnderGradStudentOf(match.university) return UndergradStudent, "literal"
class WhatCategory(QuestionTemplate): """ Ex: "What is the category of foaf?" """ regex = Lemma("what") + Lemma("be") + Lemma("the") + Lemma("category") + Pos("IN") + Vocabulary() + Question(Pos(".")) def interpret(self, match): category = dsl.HasCategory(match.vocabulary) return category, "category"
class NumberOfStaffQuestion(QuestionTemplate): """ Regex for questions about the number of staff in a university Ex: "How many staff in York University?" "How many staff working in McGill University?" "Number of staff in University of Toronto?" "Number of staff working in University of Toronto?" """ regex = ((Lemmas("how many") | Lemmas("number of")) + Lemma("staff") + Pos("IN") + University() + Question(Pos("."))) | \ ((Lemmas("how many") | Lemmas("number of")) + Lemma("staff") + Lemma("work") + Pos("IN") + University() + Question(Pos("."))) def interpret(self, match): staff = StaffOf(match.university) return staff, "literal"
class LanguageFamilyOfQuestion(QuestionTemplate): """ Regex for questions about the language family for a language Ex: "What language family does Thai language belong to?" "What language family does Arabic language part of?" "What is the language family of English language?" """ regex = (Lemma("what") + Lemma("language") + Lemma("family") + Pos("VBZ") + Language() + ((Lemma("belong") | Lemma("belongs")) + Pos("TO") | Lemma("part") + Pos("IN")) + Question(Pos("."))) | \ (Lemma("what") + Lemma("be") + Pos("DT") + Lemma("language") + Lemma("family") + Pos("IN") + Language() + Question(Pos("."))) def interpret(self, match): FamilyLanguage = LanguageFamilyOf(match.language) return LabelOf(FamilyLanguage), "enum"
class Course(Particle): regex = Question(Pos("DT")) + Lemma("cmpe") + course def interpret(self, match): name = "keyword:" + match.words.tokens return HasCourse(name)
class Thing(Particle): regex = Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) |\ Pos("VBN") def semantics(self, match): return HasKeyword(match.words.tokens)
class ListMoviesQuestion(QuestionTemplate): """ Ex: "list movies" """ prob_threshold = 0.95 tables = ["movies", "genre"] examples = ["list movies", "list popular movies"] examples_entities = [ "list <modifier>popular</modifier> <genre>action</genre> movies" ] # You must have an attribute called 'regex' # The Group option matches a movie object or a POS (e.g. POS(NN)) and associates it to the "target" name. This will create match.target object regex = Lemma("list") + Question(Lemma("recent")) + Question( Lemma("popular")) + Question(Genre()) + Group( Movie(), "target") #(Lemma("movie") | Lemma("film")) """ match obj in case of 'list movies' : {'_words': [list|list|NN|None, movies|movie|NNS|None], '_particles': {}, '_match': <refo.match.Match object at 0x7f2f1ab1f3d0>, '_j': None, '_i': None} match obj in case of 'lists movies' : {'_words': [lists|list|NNS|None, movies|movie|NNS|None], '_particles': {}, '_match': <refo.match.Match object at 0x7f2f1ab1f3d0>, '_j': None, '_i': None} As you can see in the second example, what changes is the words matched. It defines that the word lists (plural) was matched, lemmatized to list (singular general form), and it was a NNS form (plural) instead of NN (singular) at the time of the match. The other word matched was 'movies', lemmatized to 'movie', was a POS-tag NNS (plural) at the time of the match. """ def interpret(self, match): print('match') print(match.__dict__) print('match._match') print(match._match.__dict__) print('match.target') print(match.target.__dict__) # match.target exists just because of the Group(Movie(), "target") print('match.target.tokens') print(match.target.tokens) print('haskeyword') print(HasKeyword(match.target.tokens).__dict__) movie = IsMovie() movie_name = NameOf(movie) """ {'nodes': [[(u'rdf:type', u'dbpedia-owl:Film'), ('foaf:name', 1)]], 'fixedtyperelation': u'rdf:type', 'head': 0, 'fixedtype': u'dbpedia-owl:Film'} """ print('movie (ismovie)') print(movie.__dict__) """ {'nodes': [[(u'rdf:type', u'dbpedia-owl:Film'), ('foaf:name', 1)], []], 'head': 1} """ print('moviename (nameof(ismovie))') print(movie_name.__dict__) print('words are classes, not just simple texts') first_word = match.words[ 0] # {'lemma': u'list', 'token': u'list', 'pos': u'NN', 'prob': None} second_word = match.words[ 1] # {'lemma': u'movie', 'token': u'movies', 'pos': u'NNS', 'prob': None} print('first word') print(first_word.__dict__) print('second word') print(second_word.__dict__) print(match.target.tokens) matched_lemmas = [k.lemma for k in match.words] recent = u"recent" in matched_lemmas popular = u"popular" in matched_lemmas select_expressions = ["title"] if hasattr(match, 'genre'): tables = [u"genres"] condition_cols = [u"genre"] condition_values = [ ''.join(match.genre.nodes[0][1][1].split('"')[:-1]) ] else: tables = [u"title"] condition_cols = [] condition_values = [] generate_nodes_tables(movie_name, tables, select_expressions, condition_cols=condition_cols, condition_values=condition_values, popular=popular, recent=recent) movie_name.nodes[0] += " limit 10" #[u'title like "'+match.movie+'"'] print "nodes", movie_name.nodes print movie_name return movie_name, ("enum", "ListMoviesQuestion")
class Band(Particle): regex = Question(Pos("DT")) + Plus(Pos("NN") | Pos("NNP")) def interpret(self, match): name = match.words.tokens.title() return IsBand() + HasKeyword(name)
class ActedOnQuestion(QuestionTemplate): """ Ex: "List movies with Hugh Laurie" "Movies with Matt LeBlanc" "In what movies did Jennifer Aniston appear?" "Which movies did Mel Gibson starred?" "Movies starring Winona Ryder" """ prob_threshold = 0.95 tables = ["actors", "actresses"] examples = [ "List movies with Hugh Laurie", "Movies with Matt LeBlanc", "In what movies did Jennifer Aniston appear?", "Which movies did Mel Gibson starred?", "Movies starring Winona Ryder" ] acted_on = (Lemma("appear") | Lemma("act") | Lemma("star") | Lemmas("play in")) movie = Question( Lemma("recent")) + (Lemma("movie") | Lemma("movies") | Lemma("film")) regex = (Question(Lemma("list")) + movie + Lemma("with") + Actor()) | \ (Question(Pos("IN")) + (Lemma("what") | Lemma("which")) + movie + Lemma("do") + Actor() + acted_on + Question(Pos("."))) | \ (Question(Pos("IN")) + Lemma("which") + movie + Lemma("do") + Actor() + acted_on) | \ (Question(Lemma("list")) + movie + Lemma("star") + Actor())|\ Question(Lemmas("i would like to see"))+movie+ Lemma("with") +\ Actor()+ Question(Pos(".")) def interpret(self, match): movie = IsMovie() + HasActor(match.actor) movie_name = NameOf(movie) name_dir = ''.join(match.actor.nodes[0][1][1].split('"')[:-1]) names = name_dir.split() if u"recent" in [k.lemma for k in match.words]: movie_name.head = "a.name,a.surname,a.title" if len(names) == 1: movie_name.nodes = [ u' name like "' + names + '" and b.year>YEAR(curdate())-2' ] else: movie_name.nodes = [ u' name like "' + u" ".join(names[:-1]) + u'" and surname like "' + names[-1] + '" and b.year>YEAR(curdate())-2' ] movie_name.head = u"a.title" movie_name.tables = [ "actors as a left join movies as b on(a.movie_id=b.id) ", "actresses as a left join movies as b on(a.movie_id=b.id) " ] #print("definition ",definition) else: movie_name.tables = ["actors", "actresses"] movie_name.head = "title,name,surname" if len(names) == 1: movie_name.nodes = [u' name like "' + names + '"'] else: movie_name.nodes = [ u' name like "' + u" ".join(names[:-1]) + u'" and surname like "' + names[-1] + '"' ] return movie_name, ("enum", "ActedOnQuestion")
def test_rule(Subject, Object): anything = Question(Star(Any())) return Subject + Token("(") + Object + Token("-") + anything
class MoviesKeywordQuestion(QuestionTemplate): """ Ex: "I'm in the mood for a samurai story" "Show me samurai movies" "List kung-fu movies" """ prob_threshold = 0.95 #todo left join with ratings for popularity examples = [ "I'm in the mood for a samurai story", "Show me samurai movies", "List kung-fu movies" ] tables = ["genres", "keywords", "movies"] regex = ((Lemmas("show me") |\ (Question(Lemmas("tonight i feel like")) + Lemma("watch"))) + \ Question( Pos("DT")) + \ Question(Lemma("recent"))+Keyword() + Lemma("movie") + Question(Pos(".")))|\ (Question(Lemmas("i ' m in the mood for a")) + Keyword() + \ (Question(Lemma("story"))|Question(Lemma("movie")))+ \ Question(Pos("."))) def interpret(self, match): definition = DefinitionOf(match.keyword) print('aaa') #print("Match ",dir(match)) #print (match.words) #print (match.keyword.nodes) if u"recent" in [k.lemma for k in match.words]: keyword_name = ''.join( match.keyword.nodes[0][1][1].split('"')[:-1]) if keyword_name.find(" ") > -1: definition.nodes = [ u' lower(a.keyword) like "%' + '%" or lower(keyword) like "%'.join(keyword_name.split()) + '%" and b.year>YEAR(curdate())-2' ] else: definition.nodes = [ u'lower(a.keyword) like "%' + keyword_name + '%" and b.year>YEAR(curdate())-2' ] definition.head = u"a.title" definition.tables = [ "keywords as a left join movies as b on(a.movie_id=b.id) " ] #print("definition ",definition) else: keyword_name = ''.join( match.keyword.nodes[0][1][1].split('"')[:-1]) if keyword_name.find(" ") > -1: definition.nodes = [ u'lower(keyword) like "%' + u'%" or lower(keyword) like "%'.join( keyword_name.split()) + u'%"' ] else: definition.nodes = [ u'lower(keyword) like "%' + keyword_name + u'%"' ] print definition.nodes definition.head = u"title" definition.tables = ["keywords"] #print("definition ",definition) return definition, ("define", "MoviesKeywordQuestion")
class TvShow(Particle): regex = Plus(Question(Pos("DT")) + nouns) def semantics(self, match): name = match.words.tokens return IsTvShow() + HasShowName(name)
class MilitaryConflict(Particle): regex = Question(Pos("DT")) + Plus(nouns | Pos("IN")) def interpret(self, match): name = match.words.tokens return IsMilitaryConflict() + HasName(name)