def __init__(self): # Initialize commons store with knowledge base. start = time.time() self.commons = sling.Store() self.commons.lockgc() self.commons.load(wikidir + "/kb.sling", snapshot=True) self.n_item_member = self.commons['/w/item/member'] self.n_instance_of = self.commons['P31'] self.n_wikimedia_category = self.commons['Q4167836'] self.n_subject = self.commons['subject'] self.extractor = sling.FactExtractor(self.commons) # Add category subject types. self.subjects = {} for subject, item in english_subject_types.iteritems(): self.subjects[subject] = self.commons[item] # Add properties for subjects. self.subject_properties = [] for p in subject_properties: self.subject_properties.append(self.commons[p]) self.commons.freeze() end = time.time() print end - start, "secs loading commons" # Load phrase table. # TODO(ringgaard): Load language-dependent phrase table. start = time.time() self.phrasetab = sling.PhraseTable(self.commons, wikidir + "/en/phrase-table.repo") end = time.time() print end - start, "secs loading phrase table" # Open category member database. self.member_db = sling.RecordDatabase(wikidir + "/wikipedia-members.rec")
def init(self, task): self.kb = load_kb(task) self.extractor = sling.FactExtractor(self.kb) self.matcher = FactMatcher(self.kb, self.extractor)
n_lei = kb["P1278"] n_swift_bic_code = kb["P2627"] n_subsidiary = kb["P355"] n_parent = kb["P749"] n_owned_by = kb["P127"] n_owner_of = kb["P1830"] n_starttime = kb["P580"] n_endtime = kb["P582"] n_legal_form = kb["P1454"] n_coord_location = kb["P625"] n_geo = kb["/w/geo"] n_lat = kb["/w/lat"] n_lng = kb["/w/lng"] aliases = sling.PhraseTable(kb, "data/e/kb/en/phrase-table.repo") factex = sling.FactExtractor(kb) city_types = factex.taxonomy([ "Q486972", # human settlement "Q56061", # administrative territorial entity ]) # Read registers. bizregs = sling.dataset.bizreg.BusinessRegistries(kb) regauth = bizregs.by_auth_code() # Build country and region table. countries = {} regions = {} for item in kb: code = item[n_country_code]
def increment_key(dictionary, key, delta=1): dictionary[key] = dictionary.get(key, 0) + delta def fact_to_text(fact): l = [] for f in fact: l.append(str(f.name)) return ": ".join(l) commons = sling.Store() commons.lockgc() commons.load(wikidir + "/kb.sling", snapshot=True) n_is = commons["is"] extractor = sling.FactExtractor(commons) phrasetab = sling.PhraseTable(commons, wikidir + "/en/phrase-table.repo") commons.freeze() class Name: def __init__(self, doc): self.doc = doc self.store = doc.frame.store() self.covered = [False] * len(doc.tokens) self.evokes = {} self.matched = set() self.skip = [] for t in self.doc.tokens: self.skip.append(t.word in stop_words)
def index(self, i, j): return i * self.size + j - 1 # second index is 1-based def assign(self, i, j, span): self.elements[self.index(i, j)] = span def get(self, i, j): return self.elements[self.index(i, j)] commons = sling.Store() commons.lockgc() commons.load("data/e/wiki/kb.sling") phrasetab = sling.PhraseTable(commons, "data/e/wiki/en/phrase-table.repo") docschema = sling.DocumentSchema(commons) factex = sling.FactExtractor(commons) taxonomy = factex.taxonomy() titles = [ commons['Q4164871'], # position commons['Q12737077'], # occupation commons['Q216353'], # title ] commons.freeze() documentids = [ #'Q5945076', 'Q23883660', 'Q43287478', 'Q2147524', #'Q25048736', 'Q6525874', 'Q3851366', 'Q308735', 'Q2184354', 'Q5337174', 'Q6218080', 'Q1606412', 'Q7264446',