Beispiel #1
0
class Disease(Particle):
    regex = Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) |\
            Pos("VBN")

    def interpret(self, match):
        return HasDesease(match.words.tokens)
class MilitaryConflict(Particle):
    regex = Question(Pos("DT")) + nouns

    def interpret(self, match):
        name = match.words.tokens
        return IsMilitaryConflict() + LabelOfFixedDataRelation(name)
Beispiel #3
0
#!/usr/bin/python


from refo import Literal, Question, match
import re
import time

# This regular expression is known to kill the python re module
# because it exploits the fact that the implementation has exponential
# worst case complexity.
# Instead, this implementation has polinomial worst case complexity,
# and therefore this test should finish in a reasonable time.

# You might want to try with N = 20, 30, 40, 100 to see what happens
N = 25

a = Literal("a")
string = "a" * N
regex = Question(a) * N + a * N
start = time.time()
m = match(regex, string)
end = time.time()
print "Refo finished in {0:.2} seconds".format(end - start)

regex = "(:?a?){{{0}}}a{{{0}}}".format(N)
regex = re.compile(regex)
start = time.time()
regex.match(string)
end = time.time()
print "Python re finished in {0:.2} seconds".format(end - start)
Beispiel #4
0
class Thing(Particle):
    regex = Plus(Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) |
            Pos("VBN"))

    def interpret(self, match):
        return HasKeyword(match.words.tokens)
Beispiel #5
0
class SpeakersOfQuestion(QuestionTemplate):
    """
    Regex for questions about the number of people that speaks a language
    Ex: "How many people speaks English language?"
        "How many people speak Canadian French?"
        "How many people in the world can speak Arabic language?"
    """

    regex = (Lemmas("how many") + Lemma("people") + (Lemma("speaks") | Lemma("speak")) + Language() + Question(Pos("."))) | \
        (Lemmas("how many") + Lemma("people") + Pos("IN") + Pos("DT") + Lemma("world") + Lemma("can") + (Lemma("speak") | Lemma("speaks")) + Language() + Question(Pos(".")))

    def interpret(self, match):
        NumberOfSpeakers = SpeakersOf(match.language)
        return NumberOfSpeakers, "literal"
Beispiel #6
0
class Movie(Particle):
    regex = Question(Pos("DT")) + nouns

    def semantics(self, match):
        name = match.words.tokens
        return IsMovie() + HasName(name)
Beispiel #7
0

from refo import Literal, Question, match
import re
import time

# This regular expression is known to kill the python re module
# because it exploits the fact that the implementation has exponential
# worst case complexity.
# Instead, this implementation has polinomial worst case complexity,
# and therefore this test should finish in a reasonable time.

# You might want to try with N = 20, 30, 40, 100 to see what happens
N = 30

a = Literal("a")
string = "a" * N
regex = Question(a) * N + a * N
start = time.time()
m = match(regex, string)
end = time.time()
print "Refo finished in {0:.2} seconds".format(end - start)

regex = "(:?a?){{{0}}}a{{{0}}}".format(N)
print "Trying", regex
regex = re.compile(regex)
start = time.time()
regex.match(string)
end = time.time()
print "Python re finished in {0:.2} seconds".format(end - start)
Beispiel #8
0
class WhereIsHomePageQuestion(QuestionTemplate):
    """
    Ex: "Where to find foaf documentation?"
    """
    regex = Lemmas("where to") + Lemma("find") + Vocabulary() + Lemma("documentation") + Question(Pos("."))

    def interpret(self, match):
        home_uri = dsl.IsHomePageOf(match.vocabulary)
        return home_uri, "url"
Beispiel #9
0
class ProteinOfQuestion(QuestionTemplate):
    """
    Regex for questions about the protein in species
    Ex: "How much protein in an apple?"
        "How much protein an Apple have?"
        "Do Apple have protein?"
    """

    regex = Lemmas("how much") + Lemma("protein") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \
        (Lemmas("how much") + Lemma("protein") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \
        (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("protein") + Question(Pos(".")))

    def interpret(self, match):
        name = match.species.tokens
        species = IsSpecies() + HasKeyword(name)
        protein = ProteinOf(species)
        return protein, "enum"
Beispiel #10
0
class SugarOfQuestion(QuestionTemplate):
    """
    Regex for questions about the sugar in species
    Ex: "How much sugar in an Apple?"
        "How much sugar an Apple have?"
        "Do Apple have sugar?"
    """

    regex = Lemmas("how much") + Lemma("sugar") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \
        (Lemmas("how much") + Lemma("sugar") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \
        (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("sugar") + Question(Pos(".")))

    def interpret(self, match):
        name = match.species.tokens
        species = IsSpecies() + HasKeyword(name)
        suger = SugarOf(species)
        return suger, "enum"
Beispiel #11
0
class CarbsOfQuestion(QuestionTemplate):
    """
    Regex for questions about the carbs in species
    Ex: "How much carbs in an apple?"
        "How much carbs an Apple have?"
        "Do Apple have carbs?"
    """

    regex = Lemmas("how much") + Lemma("carbs") + Pos("IN") + Pos("DT") + Group(Pos("NNP"), 'species') + Question(Pos(".")) | \
        (Lemmas("how much") + Lemma("carbs") + Pos("DT") + Group(Pos("NNP"), 'species') + Lemma("have") + Question(Pos("."))) | \
        (Lemma("do") + Group(Pos("NNP"), 'species') + Lemma("have") + Lemma("carbs") + Question(Pos(".")))

    def interpret(self, match):
        name = match.species.tokens
        species = IsSpecies() + HasKeyword(name)
        carbs = CarbsOf(species)
        return carbs, "enum"
Beispiel #12
0
class OwnerOfQuestion(QuestionTemplate):
    """
    Regex for questions about the owner of a hotel
    Ex: "who is the owner of Capital Hilton?"
    """
    
    regex = (Lemmas("who be") + Lemma("the") + Lemma("owner") + Pos("IN") + Hotel() + Question(Lemma("hotel")) + Question(Pos("."))) 
    
    def interpret(self, match):
        Owner = OwnerOf(match.hotel)
        return NameOf(Owner), "enum"
Beispiel #13
0
        class Movie(Particle):
            regex = Question(Pos("DT")) + \
                    Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS"))

            def semantics(self, match):
                return match.words.tokens
Beispiel #14
0
class WhatPilot(QuestionTemplate):
    """
    Ex: "What is the pilot of foaf?"
    """
    regex = Lemma("what") + Lemma("be") + Lemma("the") + Lemma("pilot") + Pos("IN") + Vocabulary() + Question(Pos("."))

    def interpret(self, match):
        pilot = dsl.HasPilot(match.vocabulary)
        return pilot, "pilot"
Beispiel #15
0
class GradStudentQuestion(QuestionTemplate):
    """
    Regex for questions about the number of graduate students in a university
    Ex: "How many postgraduate students in York University?"
        "How many post grad students in University of Toronto?"
        "How many post graduate students in University of Toronto?"
        "Number of postgraduate students in York University?"
        "Number of post grad students in Harvard University?"
        "Number of post graduate students in Harvard University?"    
    """

    regex = ((Lemmas("how many")| Lemmas("number of")) + Lemma("postgraduate") + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) | \
        ((Lemmas("how many") | Lemmas("number of")) + Lemma("post") + (Lemma("grad") | Lemma("graduate")) + Lemma("student") + Pos("IN") + University() + Question(Pos(".")))

    def interpret(self, match):
        GradStudent = GradStudentOf(match.university)
        return GradStudent, "literal"
Beispiel #16
0
class WhatIsNamespaceQuestion(QuestionTemplate):
    """
    Ex: "what is the namespace of dcterms?"
    """
    regex = Lemma("what") + Lemma("be") + Lemmas("the namespace") + Pos("IN") + Vocabulary() + Question(Pos("."))

    def interpret(self, match):
        uri = dsl.HasURI(match.vocabulary)
        return uri, "url"
Beispiel #17
0
class UndergradStudentQuestion(QuestionTemplate):
    """
    Regex for questions about the number of undergraduate students in a university
    Ex: "How many undergraduate students in York University?"
        "How many under grad students in University of Toronto?"
        "How many under graduate students in University of Toronto?"
        "Number of undergraduate students in York University?"
        "Number of under grad students in McGill University?"
        "Number of under graduate students in McGill University?"
    """

    regex = ((Lemmas("how many") | Lemmas("number of")) + Lemma("undergraduate") + Lemma("student") + Pos("IN") + University() + Question(Pos("."))) | \
        ((Lemmas("how many")| Lemmas("number of")) + Lemma("under") + (Lemma("grad") | Lemma("graduate")) + Lemma("student") + Pos("IN") + University() + Question(Pos(".")))

    def interpret(self, match):
        UndergradStudent = UnderGradStudentOf(match.university)
        return UndergradStudent, "literal"
Beispiel #18
0
class WhatCategory(QuestionTemplate):
    """
    Ex: "What is the category of foaf?"
    """
    regex = Lemma("what") + Lemma("be") + Lemma("the") + Lemma("category") + Pos("IN") + Vocabulary() + Question(Pos("."))

    def interpret(self, match):
        category = dsl.HasCategory(match.vocabulary)
        return category, "category"
Beispiel #19
0
class NumberOfStaffQuestion(QuestionTemplate):
    """
    Regex for questions about the number of staff in a university
    Ex: "How many staff in York University?"
        "How many staff working in McGill University?"
        "Number of staff in University of Toronto?"
        "Number of staff working in University of Toronto?"
    """

    regex = ((Lemmas("how many") | Lemmas("number of")) + Lemma("staff") + Pos("IN") + University() + Question(Pos("."))) | \
        ((Lemmas("how many") | Lemmas("number of")) + Lemma("staff") + Lemma("work") + Pos("IN") + University() + Question(Pos(".")))

    def interpret(self, match):
        staff = StaffOf(match.university)
        return staff, "literal"
Beispiel #20
0
class LanguageFamilyOfQuestion(QuestionTemplate):
    """
    Regex for questions about the language family for a language
    Ex: "What language family does Thai language belong to?"
        "What language family does Arabic language part of?"
        "What is the language family of English language?"
    """

    regex = (Lemma("what")  + Lemma("language") + Lemma("family") + Pos("VBZ") + Language() + ((Lemma("belong") | Lemma("belongs")) + Pos("TO") | Lemma("part") + Pos("IN")) + Question(Pos("."))) | \
        (Lemma("what")  + Lemma("be") + Pos("DT") + Lemma("language") + Lemma("family")  + Pos("IN") + Language() + Question(Pos(".")))

    def interpret(self, match):
        FamilyLanguage = LanguageFamilyOf(match.language)
        return LabelOf(FamilyLanguage), "enum"
Beispiel #21
0
class Course(Particle):
    regex = Question(Pos("DT")) + Lemma("cmpe") + course

    def interpret(self, match):
        name = "keyword:" + match.words.tokens
        return HasCourse(name)
Beispiel #22
0
class Thing(Particle):
    regex = Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) |\
            Pos("VBN")

    def semantics(self, match):
        return HasKeyword(match.words.tokens)
Beispiel #23
0
class ListMoviesQuestion(QuestionTemplate):
    """
    Ex: "list movies"
    """
    prob_threshold = 0.95
    tables = ["movies", "genre"]
    examples = ["list movies", "list popular movies"]
    examples_entities = [
        "list <modifier>popular</modifier> <genre>action</genre> movies"
    ]

    # You must have an attribute called 'regex'
    # The Group option matches a movie object or a POS (e.g. POS(NN)) and associates it to the "target" name. This will create match.target object
    regex = Lemma("list") + Question(Lemma("recent")) + Question(
        Lemma("popular")) + Question(Genre()) + Group(
            Movie(), "target")  #(Lemma("movie") | Lemma("film"))
    """
    match obj in case of 'list movies' :
    {'_words': [list|list|NN|None, movies|movie|NNS|None], '_particles': {}, '_match': <refo.match.Match object at 0x7f2f1ab1f3d0>, '_j': None, '_i': None}

    match obj in case of 'lists movies' :
    {'_words': [lists|list|NNS|None, movies|movie|NNS|None], '_particles': {}, '_match': <refo.match.Match object at 0x7f2f1ab1f3d0>, '_j': None, '_i': None}

    As you can see in the second example, what changes is the words matched.
    It defines that the word lists (plural) was matched, lemmatized to list (singular general form), and it was a NNS form (plural) instead of NN (singular) at the time of the match.

    The other word matched was 'movies', lemmatized to 'movie', was a POS-tag NNS (plural) at the time of the match.
    """
    def interpret(self, match):
        print('match')
        print(match.__dict__)

        print('match._match')
        print(match._match.__dict__)

        print('match.target')
        print(match.target.__dict__)

        # match.target exists just because of the Group(Movie(), "target")
        print('match.target.tokens')
        print(match.target.tokens)

        print('haskeyword')
        print(HasKeyword(match.target.tokens).__dict__)

        movie = IsMovie()
        movie_name = NameOf(movie)
        """
        {'nodes': [[(u'rdf:type', u'dbpedia-owl:Film'), ('foaf:name', 1)]], 'fixedtyperelation': u'rdf:type', 'head': 0, 'fixedtype': u'dbpedia-owl:Film'}
        """
        print('movie (ismovie)')
        print(movie.__dict__)
        """
        {'nodes': [[(u'rdf:type', u'dbpedia-owl:Film'), ('foaf:name', 1)], []], 'head': 1}
        """
        print('moviename (nameof(ismovie))')
        print(movie_name.__dict__)

        print('words are classes, not just simple texts')
        first_word = match.words[
            0]  # {'lemma': u'list', 'token': u'list', 'pos': u'NN', 'prob': None}
        second_word = match.words[
            1]  # {'lemma': u'movie', 'token': u'movies', 'pos': u'NNS', 'prob': None}
        print('first word')
        print(first_word.__dict__)
        print('second word')
        print(second_word.__dict__)

        print(match.target.tokens)

        matched_lemmas = [k.lemma for k in match.words]
        recent = u"recent" in matched_lemmas
        popular = u"popular" in matched_lemmas

        select_expressions = ["title"]

        if hasattr(match, 'genre'):
            tables = [u"genres"]
            condition_cols = [u"genre"]
            condition_values = [
                ''.join(match.genre.nodes[0][1][1].split('"')[:-1])
            ]
        else:
            tables = [u"title"]
            condition_cols = []
            condition_values = []
        generate_nodes_tables(movie_name,
                              tables,
                              select_expressions,
                              condition_cols=condition_cols,
                              condition_values=condition_values,
                              popular=popular,
                              recent=recent)
        movie_name.nodes[0] += " limit 10"  #[u'title like "'+match.movie+'"']
        print "nodes", movie_name.nodes
        print movie_name
        return movie_name, ("enum", "ListMoviesQuestion")
Beispiel #24
0
class Band(Particle):
    regex = Question(Pos("DT")) + Plus(Pos("NN") | Pos("NNP"))

    def interpret(self, match):
        name = match.words.tokens.title()
        return IsBand() + HasKeyword(name)
Beispiel #25
0
class ActedOnQuestion(QuestionTemplate):
    """
    Ex: "List movies with Hugh Laurie"
        "Movies with Matt LeBlanc"
        "In what movies did Jennifer Aniston appear?"
        "Which movies did Mel Gibson starred?"
        "Movies starring Winona Ryder"
    """
    prob_threshold = 0.95
    tables = ["actors", "actresses"]
    examples = [
        "List movies with Hugh Laurie", "Movies with Matt LeBlanc",
        "In what movies did Jennifer Aniston appear?",
        "Which movies did Mel Gibson starred?", "Movies starring Winona Ryder"
    ]

    acted_on = (Lemma("appear") | Lemma("act") | Lemma("star")
                | Lemmas("play in"))
    movie = Question(
        Lemma("recent")) + (Lemma("movie") | Lemma("movies") | Lemma("film"))
    regex = (Question(Lemma("list")) + movie + Lemma("with") + Actor()) | \
            (Question(Pos("IN")) + (Lemma("what") | Lemma("which")) +
             movie + Lemma("do") + Actor() + acted_on + Question(Pos("."))) | \
            (Question(Pos("IN")) + Lemma("which") + movie + Lemma("do") +
             Actor() + acted_on) | \
            (Question(Lemma("list")) + movie + Lemma("star") + Actor())|\
            Question(Lemmas("i would like to see"))+movie+ Lemma("with") +\
            Actor()+ Question(Pos("."))

    def interpret(self, match):
        movie = IsMovie() + HasActor(match.actor)
        movie_name = NameOf(movie)
        name_dir = ''.join(match.actor.nodes[0][1][1].split('"')[:-1])
        names = name_dir.split()
        if u"recent" in [k.lemma for k in match.words]:
            movie_name.head = "a.name,a.surname,a.title"

            if len(names) == 1:
                movie_name.nodes = [
                    u' name like "' + names + '" and  b.year>YEAR(curdate())-2'
                ]
            else:
                movie_name.nodes = [
                    u' name like "' + u" ".join(names[:-1]) +
                    u'" and surname like "' + names[-1] +
                    '" and  b.year>YEAR(curdate())-2'
                ]
            movie_name.head = u"a.title"
            movie_name.tables = [
                "actors as a left join movies as b on(a.movie_id=b.id) ",
                "actresses as a left join movies as b on(a.movie_id=b.id) "
            ]
            #print("definition ",definition)

        else:
            movie_name.tables = ["actors", "actresses"]
            movie_name.head = "title,name,surname"
            if len(names) == 1:
                movie_name.nodes = [u' name like "' + names + '"']
            else:
                movie_name.nodes = [
                    u' name like "' + u" ".join(names[:-1]) +
                    u'" and surname like "' + names[-1] + '"'
                ]
        return movie_name, ("enum", "ActedOnQuestion")
Beispiel #26
0
 def test_rule(Subject, Object):
     anything = Question(Star(Any()))
     return Subject + Token("(") + Object + Token("-") + anything
Beispiel #27
0
class MoviesKeywordQuestion(QuestionTemplate):
    """
    Ex: "I'm in the mood for a samurai story"
        "Show me samurai movies"
        "List kung-fu movies"
    """
    prob_threshold = 0.95
    #todo left join with ratings for popularity
    examples = [
        "I'm in the mood for a samurai story", "Show me samurai movies",
        "List kung-fu movies"
    ]

    tables = ["genres", "keywords", "movies"]

    regex =  ((Lemmas("show me") |\
             (Question(Lemmas("tonight i feel like")) + Lemma("watch"))) + \
     Question( Pos("DT")) + \
    Question(Lemma("recent"))+Keyword() + Lemma("movie") + Question(Pos(".")))|\
              (Question(Lemmas("i ' m in the mood for a")) + Keyword() + \
               (Question(Lemma("story"))|Question(Lemma("movie")))+ \
                Question(Pos(".")))

    def interpret(self, match):
        definition = DefinitionOf(match.keyword)
        print('aaa')
        #print("Match ",dir(match))
        #print (match.words)
        #print (match.keyword.nodes)
        if u"recent" in [k.lemma for k in match.words]:

            keyword_name = ''.join(
                match.keyword.nodes[0][1][1].split('"')[:-1])
            if keyword_name.find(" ") > -1:
                definition.nodes = [
                    u' lower(a.keyword) like "%' +
                    '%" or lower(keyword) like "%'.join(keyword_name.split()) +
                    '%" and  b.year>YEAR(curdate())-2'
                ]
            else:
                definition.nodes = [
                    u'lower(a.keyword) like "%' + keyword_name +
                    '%"  and  b.year>YEAR(curdate())-2'
                ]
            definition.head = u"a.title"
            definition.tables = [
                "keywords as a left join movies as b on(a.movie_id=b.id) "
            ]
            #print("definition ",definition)

        else:
            keyword_name = ''.join(
                match.keyword.nodes[0][1][1].split('"')[:-1])
            if keyword_name.find(" ") > -1:
                definition.nodes = [
                    u'lower(keyword) like "%' +
                    u'%" or lower(keyword) like "%'.join(
                        keyword_name.split()) + u'%"'
                ]
            else:
                definition.nodes = [
                    u'lower(keyword) like "%' + keyword_name + u'%"'
                ]
            print definition.nodes
            definition.head = u"title"
            definition.tables = ["keywords"]
            #print("definition ",definition)
        return definition, ("define", "MoviesKeywordQuestion")
Beispiel #28
0
class TvShow(Particle):
    regex = Plus(Question(Pos("DT")) + nouns)

    def semantics(self, match):
        name = match.words.tokens
        return IsTvShow() + HasShowName(name)
Beispiel #29
0
class MilitaryConflict(Particle):
    regex = Question(Pos("DT")) + Plus(nouns | Pos("IN"))

    def interpret(self, match):
        name = match.words.tokens
        return IsMilitaryConflict() + HasName(name)