def find_hearst_concepts(self, triples): s_concepts = [] m_concepts = [] for (t1, rel, t2) in triples: term1 = Term(preprocessor.pos_tag(t1, True)) term2 = Term(preprocessor.pos_tag(t2, True)) synsets1 = wn.synsets(term1.get_head()[0], self.pos_tag(term1.get_head()[1])) synsets2 = wn.synsets(term2.get_head()[0], self.pos_tag(term2.get_head()[1])) if not synsets1: raise Exception("'{}' not found in WordNet".format(term1.get_head()[0])) if not synsets2: raise Exception("'{}' not found in WordNet".format(term2.get_head()[0])) (best1, best2) = self.comp(synsets1, synsets2) con1 = self.get_concept( concept.Concept(synset=best1, term=term1.get_head()[0]) ) con2 = self.get_concept( concept.Concept(synset=best2, term=term2.get_head()[0]) ) conChild1 = None conChild2 = None if len(term1.get_terms()) > 1: conChild1 = self.get_concept( concept.Concept(name=term1.get_terms(), term=term1.get_head()[0]) ) con1.add_hyponym(conChild1) conChild1.add_hypernym(con1) #m_concepts.append(conChild1) if len(term2.get_terms()) > 1: conChild2 = self.get_concept( concept.Concept(name=term2.get_terms(), term=term2.get_head()[0]) ) con2.add_hyponym(conChild2) conChild2.add_hypernym(con2) #m_concepts.append(conChild2) if conChild1: if conChild2: conChild1.add_relation(conChild2, rel) else: conChild1.add_relation(con2, rel) m_concepts.append(conChild1) else: if conChild2: con1.add_relation(conChild2, rel) m_concepts.append(conChild2) else: con1.add_relation(con2, rel) s_concepts.append(con1) s_concepts.append(con2) self.single_concepts = self.single_concepts.union(set(s_concepts)) self.multi_concepts = self.multi_concepts.union(set(m_concepts))
def load(book): Definitions = [] Propositions = [] suf = bookToRoman(book) base_url = "http://aleph0.clarku.edu/~djoyce/java/elements/" url = base_url + "book" + suf + "/" + "book" + suf + ".html" page = requests.get(url) soup = BeautifulSoup(page.text) for conceptType in soup.find_all('dl'): #[0].find_all('dd'): prev = conceptType.previous_element.previous_element if prev == 'Definitions' or prev == "Common Notions" or prev == "Postulates": i = 1 for dd in conceptType.find_all('dd'): Definitions.append( concept.Concept( prev[0:3], book, i, dd.get_text().replace('\r', '').replace('\n', ""), "", [], [])) i += 1 if prev == 'Propositions': count = 0 for a in conceptType.find_all('b'): if "Proposition" in str(a): count += 1 for i in range(1, count + 1): url = base_url + "book" + suf + "/prop" + suf + str( i) + ".html" page = requests.get(url) soup = BeautifulSoup(page.text) print url statement = soup.find_all('div', class_='statement')[0].get_text() text = "" fromLink = [] for p in soup.find('div', class_='theorem').find_all('p'): text += p.get_text() for j in soup.find('div', class_='theorem').find_all('div', class_='just'): for l in justificationToLink(j.get_text()): fromLink.append(l) Propositions.append( concept.Concept( "Pro", book, i, statement.replace('\r', '').replace('\n', ''), text.replace('\r', '').replace('\n', ''), [], fromLink)) return [Definitions, Propositions]
def form(self, terms): #actual formation sets = [] multiwords = [] #look for multiword terms, for concepts of them and then go on with the disambiguation only #with the head-term for term in terms: if len(term.get_terms()) > 1: con = self.get_concept( concept.Concept(name=term.get_terms(), term=term.get_head()[0]) ) multiwords.append(con) synsets = self.lookUp(term.get_head()[0], term.get_head()[1]) if synsets: sets.append(synsets) if sets: easies = self.get_easies(sets) rest = [x for x in sets if not x in easies] concepts = self.compare_easies(easies) concepts = self.compare_concepts(concepts, rest) for (mult, con) in itertools.product(multiwords, concepts): if str(con.get_term()) == str(mult.get_term()): mult.add_hypernym(con) con.add_hyponym(mult) self.single_concepts = set(concepts) self.multi_concepts = set(multiwords)
def compare_concepts(self, concepts, rest): #Compares already found concepts to possible synsts for each term #in order to find most probable one conNoTerm = [x[0] for x in concepts] restNoTerm = [y[0] for y in rest] for synsets in restNoTerm: similarities = [] for possib in synsets: similarity = 0 for con in conNoTerm: similarity = similarity + possib.path_similarity(con) similarities.append(similarity) concepts.append((synsets[similarities.index(max(similarities))], rest[restNoTerm.index(synsets)][1])) result = [] for conc in concepts: c = self.get_concept( concept.Concept(synset=conc[0], term=conc[1], name=conc[1]) ) result.append(c) return result
def concept_from_formula(fmla): vs = sorted(list(ilu.used_variables_ast(fmla)), key=str) name = (','.join(str(v) + ':' + str(v.sort) for v in vs) + '.' + str(fmla)) return co.Concept(name, vs, fmla)