class AssetsQuestion(QuestionTemplate): """ Regex for questions about how much assets a company has. Ex: How much assets does Google have? """ regex = Pos("WRB") + Plus(Lemma("much") | Lemma("many")) + Plus( Lemma("asset") | Lemma("assets")) + Lemma("do") + Group( Pos("NNP"), 'company') + Lemma("have") + Pos(".") def interpret(self, match): name = match.company.tokens company = IsCompany() + HasKeyword(name) assets = AssetsOf(company) return assets, "enum"
class Hotel(Particle): regex = Plus(Pos("CD") | Pos("JJ") |Pos("NNS") | Pos("DT") | Pos("NNP") | Pos("NNP") | Pos("NNP")) def interpret(self, match): name = match.words.tokens return IsHotel() + HasKeyword(name)
class Restaurant(Particle): regex = Plus(Pos("NNS") | Pos("NNP") | Pos("NNPS") | Pos("DT") | Pos("CD")) def interpret(self, match): name = match.words.tokens return IsRestaurant() + HasKeyword(name)
class Thing(Particle): regex = Plus( Question(Pos("JJ")) + (Pos("NN") | Pos("NNP") | Pos("NNS")) | Pos("VBN")) def interpret(self, match): return HasKeyword(match.words.tokens)
def was_born_and_mentions_place(Subject, Object): """ Ex: Nasser Sharify was born in Tehran, Iran, in 1925. """ place = Plus(Pos("NNP") + Question(Token(","))) return Subject + Token("was born") + Pos("IN") + place + Pos( "IN") + Object + Question(Pos("."))
def mentions_real_name(Subject, Object): """ Ex: Harry Pilling, born Ashtonunder-Lyne, Lancashire on 2 February 1943, played ... """ anything = Star(Any()) real_name = Plus(Pos("NNP") + Question(Token(","))) return Subject + Token("born") + real_name + Pos("IN") + Object + anything
class Language(Particle): regex = Plus(Pos("JJ") | Pos("NN") | Pos("NNP") | Pos("NNS")) def interpret(self, match): name = match.words.tokens return IsLanguage() + HasKeyword(name)
def incorrect_labeling_of_place_as_person(Subject, Object): """ Ex: Sophie Christiane of Wolfstein (24 October 24, 1667 - 23 August 1737) Wolfstein is a *place*, not a *person* """ anything = Star(Any()) person = Plus(Pos("NNP") + Question(Token(","))) return anything + person + Token("of") + Subject + anything
class Location(Particle): regex = Plus( Pos("DT") | Pos("IN") | Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")) def interpret(self, match): name = match.words.tokens.title() return IsLocation() + HasKeyword(name)
class Vocabulary(Particle): regex = Plus( Pos("NN") | Pos("NNS") | Pos("FW") | Pos("DT") | Pos("JJ") | Pos("VBN")) def interpret(self, match): name = match.words.tokens return dsl.IsVocab() + HasKeyword(name)
def was_born_and_mentions_place_2(Subject, Object): """ Ex: Theodone C. Hu was born in 1872 in Huangpu town, Haizhu District, Guangzhou, Guangdong, China. """ anything = Star(Any()) place = Plus(Pos("NNP") + Question(Token(","))) return Subject + Token("was born") + Pos("IN") + Object + Pos( "IN") + place + anything
class Person(Particle): regex = Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS") | Pos("JJ")) def interpret(self, match): name = match.words.tokens pronouns.his = name pronouns.her = name return IsPerson() + HasKeyword(name)
class Person(Particle): regex = Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")) def interpret(self, match): name = match.words.tokens person = IsPerson() hasname = HasKeyword(name) person_hasname = person + hasname return person_hasname
def born_date_and_place_in_parenthesis(Subject, Object): """ Ex: Gary Sykes (Born 13 February 1984) is a British super featherweight boxer. """ anything = Star(Any()) born = (Token("Born") | Token("born")) entity_leftover = Star(Pos("NNP")) place = Plus(Pos("NNP") + Question(Token(","))) return Subject + entity_leftover + Pos("-LRB-") + born + Object + Token( ",") + place + Pos("-RRB-") + anything
def parse_element_into_books(html_elements): # Based on https://github.com/machinalis/refo/blob/master/examples/xml_reader.py is_header = lambda elem: elem.get('class').startswith('bookMain') is_highlight = lambda elem: elem.get('class').startswith('highlightRow') regex = Group(Predicate(is_header) + Plus(Predicate(is_highlight)), 'book') groups = [ html_elements[g['book'][0]:g['book'][1]] for g in finditer(regex, html_elements) ] return [Book(group) for group in groups]
class RiversInCountry(QuestionTemplate): """ regex for which River flows in India? """ regex1 = Pos('WDT') regex2 = Lemma('river') + Plus(Pos('VBZ') | Pos('VBP')) + Pos('IN') + Country() regex = regex1 + regex2 + Pos('.') | regex2 | regex2 + Pos( '.') | regex1 + regex2 def interpret(self, match): print 'parsed RiverInCountry' return
class WhereIsQuestion(QuestionTemplate): """ Ex: "where in the world is the Eiffel Tower" projects funded by national ins cnter """ thing = Group(Plus(Pos("NP") | Pos("NNP") | Pos("NNPS")), "thing") entity = Group(Pos("NN") | Pos("NNS"), "entity") verb = Group(Plus(Pos("VBD") | Pos("IN")), "verb") regex = entity + verb + thing def interpret(self, match): print "checked" thing = AgencyName(match.thing.tokens) entity = AgencyAppId(thing) entity = ProjAppId(entity) #target_type = HasKeyword(match.target.lemmas) #print target_type #target = IsRelatedTo(entity) label = TitleOf(entity) return label, "enum"
class InstructorOfficeHour(QuestionTemplate): """ Ex: "What time does the cmpe 273 instructor have office hours?" "When does the cmpe 273 instructor have office hours?" """ regex = Plus(Lemmas("what time") | Lemma("when")) + Lemma("do") + Question(Pos("DT")) + Course() + Lemma("instructor") \ + Lemma("have") + Lemmas("office hours") + Question(Pos(".")) def interpret(self, match): answer = "The instructor for %s is available from %s" instructor_office_hour = IsInstructorInfoRelated( ) + match.course + HasFields( 'office_hour'.decode('utf-8')) + HasAnswer(answer.decode('utf-8')) return instructor_office_hour
class PersonThatTookPartInConflictNationality(QuestionTemplate): regex = Question(Lemma("list")) + ( Lemma("person") | Lemma("persons") | Lemma("people")) + (Pos("WP") | Pos("WDT")) + ( (Lemma("was") + Lemma("involved")) | (Lemma("took") + Lemma("part")) | Lemma("fight") | Lemma("fought")) + Pos("IN") + MilitaryConflict() + Plus( Pos("WP") | Pos("WDT") | Pos("WRB") | Lemma("be")) + ( Lemma("from") | Lemma("bear") + Pos("IN")) + Country() def interpret(self, match): military_conflict, i, j = match.militaryconflict _nationality, i1, j1 = match.country rezultat = IsPerson() + PartOfBattle( military_conflict) + HasBirthPlace(_nationality) return rezultat, ReturnValue(i, j)
class PersonThatTookPartInConflictBornAfter(QuestionTemplate): regex = Question(Lemma("list")) + ( Lemma("person") | Lemma("persons") | Lemma("people")) + (Pos("WP") | Pos("WDT")) + ( (Lemma("was") + Lemma("involved")) | (Lemma("took") + Lemma("part")) | Lemma("fight") | Lemma("fought") ) + Pos("IN") + MilitaryConflict() + Plus( Pos("WP") | Pos("WDT") | Pos("WRB") | Lemma("be")) + Lemma("bear") + Lemma("after") + BornAfterYear() def interpret(self, match): military_conflict, i, j = match.militaryconflict _born_year, i1, j1 = match.bornafteryear rezultat = military_conflict + IsCommander( IsMilitaryPersonnelInvolved(IsPerson() + _born_year + HasId())) return rezultat, ReturnValue(i, j)
class WhatTimeIs(RegexTemplate): """ Regex for questions about the time Ex: "What time is in Cordoba" """ nouns = Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")) place = Group(nouns, "place") openings = (Lemma("what") + Lemma("time") + Token("is")) | Lemma("time") regex = openings + Pos("IN") + place + Question(Pos(".")) def semantics(self, match): place = HasKeyword(match.place.lemmas.title()) + IsPlace() utc_offset = UTCof(place) return utc_offset, "time"
class WhereIsQuestion(QuestionTemplate): """ Ex: "where in the world is the Eiffel Tower" """ thing = Group(Plus(Pos("IN") | Pos("NP") | Pos("NNP") | Pos("NNPS")), "thing") regex = Lemma("where") + Question(Lemmas("in the world")) + Lemma("be") + \ Question(Pos("DT")) + thing + Question(Pos(".")) def interpret(self, match): thing = HasKeyword(match.thing.tokens) location = LocationOf(thing) location_name = LabelOf(location) return location_name, "enum"
class ExamQuestion(QuestionTemplate): """ Ex: "When is the final exam for cmpe 273?" "When is the midterm exam for cmpe 273?" "What time is the final exam for cmpe 273?" "What time is the midterm exam for cmpe 273?" """ opening = Lemmas("what time be") | Lemmas("when be") exam = Group(Plus(Lemmas("final exam") | Lemmas("midterm exam")), "exam") regex = opening + Question( Pos("DT")) + exam + Pos("IN") + Course() + Question(Pos(".")) def interpret(self, match): exam = "The %s" % match.exam.tokens answer = exam + " for %s is on %s" exam_time = IsExamRelated() + match.course + HasFields( match.exam.tokens) + HasAnswer(answer.decode('utf-8')) return exam_time
class ListRestaurantInCityQuestion(QuestionTemplate): """ Regex for questions about listing all restaurants in a city Ex: "list all restaurants in New York City?" "Restaurants in London?" """ city = Group(Plus(Pos("NN") | Pos("NNS") | Pos("NNP") | Pos("NNPS")), "city") regex = (Question(Lemma("list")) + Question(Lemma("all")) + (Lemma("restaurant") | Lemma("restaurants")) + Pos("IN") + city + Question(Pos("."))) def interpret(self, match): city = HasKeyword(match.city.tokens) restaurants = IsRestaurant() + CityOf(city) return NameOf(restaurants), "enum"
class PersonThatTookPartInConflictNationalityBornBefore(QuestionTemplate): regex = Question(Lemma("list")) + ( Lemma("person") | Lemma("persons") | Lemma("people")) + (Pos("WP") | Pos("WDT")) + ( (Lemma("was") + Lemma("involved")) | (Lemma("took") + Lemma("part")) | Lemma("fight") | Lemma("fought")) + Pos("IN") + MilitaryConflict() + Plus( Pos("WP") | Pos("WDT") | Pos("WRB") | Lemma("be")) + ( Lemma("from") | Lemma("bear") + Pos("IN") ) + Nationality() + Lemma("before") + BornBeforeYear() def interpret(self, match): military_conflict, i, j = match.militaryconflict _nationality, i1, j1 = match.nationality _born_year, i2, j2 = match.bornbeforeyear rezultat = military_conflict + military_conflict + IsMilitaryPersonnelInvolved( IsPerson() + _born_year + _nationality + HasId()) return rezultat, ReturnValue(i, j)
class Author(Particle): regex = Plus(nouns | Lemma(".")) def interpret(self, match): name = match.words.tokens return IsPerson() + HasKeyword(name)
class Book(Particle): regex = Plus(nouns) def interpret(self, match): name = match.words.tokens return IsBook() + HasKeyword(name)
class Band(Particle): regex = Question(Pos("DT")) + Plus(Pos("NN") | Pos("NNP") | Pos("CD")) def interpret(self, match): name = match.words.tokens.title() return IsBand() + HasKeyword(name)
class BandName(Particle): regex = Question(Pos("DT")) + Plus(Pos("NN") | Pos("NNP")) def interpret(self, match): name = match.words.tokens.title() return LabelOfFixedDataRelation(name)
def list_of(things): thing_seq = Plus(R(lowercase_is_in(things))) return thing_seq + Star(Qu(P(',')) + Qu(L('and') | L('or')) + thing_seq)