Ejemplo n.º 1
0
    def find_hearst_concepts(self, triples):
        s_concepts = []
        m_concepts = []
        for (t1, rel, t2) in triples:
            term1 = Term(preprocessor.pos_tag(t1, True))
            term2 = Term(preprocessor.pos_tag(t2, True))

            synsets1 = wn.synsets(term1.get_head()[0], self.pos_tag(term1.get_head()[1]))
            synsets2 = wn.synsets(term2.get_head()[0], self.pos_tag(term2.get_head()[1]))

            if not synsets1:
                raise Exception("'{}' not found in WordNet".format(term1.get_head()[0]))
            if not synsets2:
                raise Exception("'{}' not found in WordNet".format(term2.get_head()[0]))

            (best1, best2) = self.comp(synsets1, synsets2)

            con1 = self.get_concept(
                concept.Concept(synset=best1, term=term1.get_head()[0])
            )
            con2 = self.get_concept(
                concept.Concept(synset=best2, term=term2.get_head()[0])
            )

            conChild1 = None
            conChild2 = None
            if len(term1.get_terms()) > 1:
                conChild1 = self.get_concept(
                    concept.Concept(name=term1.get_terms(), term=term1.get_head()[0])
                )
                con1.add_hyponym(conChild1)
                conChild1.add_hypernym(con1)
                #m_concepts.append(conChild1)
            if len(term2.get_terms()) > 1:
                conChild2 = self.get_concept(
                    concept.Concept(name=term2.get_terms(), term=term2.get_head()[0])
                )
                con2.add_hyponym(conChild2)
                conChild2.add_hypernym(con2)
                #m_concepts.append(conChild2)

            if conChild1:
                if conChild2:
                    conChild1.add_relation(conChild2, rel)
                else:
                    conChild1.add_relation(con2, rel)
                m_concepts.append(conChild1)
            else:
                if conChild2:
                    con1.add_relation(conChild2, rel)
                    m_concepts.append(conChild2)
                else:
                    con1.add_relation(con2, rel)

            s_concepts.append(con1)
            s_concepts.append(con2)

        self.single_concepts = self.single_concepts.union(set(s_concepts))
        self.multi_concepts = self.multi_concepts.union(set(m_concepts))
Ejemplo n.º 2
0
def load(book):

    Definitions = []
    Propositions = []
    suf = bookToRoman(book)
    base_url = "http://aleph0.clarku.edu/~djoyce/java/elements/"
    url = base_url + "book" + suf + "/" + "book" + suf + ".html"
    page = requests.get(url)
    soup = BeautifulSoup(page.text)

    for conceptType in soup.find_all('dl'):  #[0].find_all('dd'):
        prev = conceptType.previous_element.previous_element
        if prev == 'Definitions' or prev == "Common Notions" or prev == "Postulates":
            i = 1
            for dd in conceptType.find_all('dd'):
                Definitions.append(
                    concept.Concept(
                        prev[0:3], book, i,
                        dd.get_text().replace('\r', '').replace('\n', ""), "",
                        [], []))
                i += 1
        if prev == 'Propositions':
            count = 0
            for a in conceptType.find_all('b'):
                if "Proposition" in str(a):
                    count += 1
            for i in range(1, count + 1):
                url = base_url + "book" + suf + "/prop" + suf + str(
                    i) + ".html"
                page = requests.get(url)
                soup = BeautifulSoup(page.text)
                print url
                statement = soup.find_all('div',
                                          class_='statement')[0].get_text()
                text = ""
                fromLink = []
                for p in soup.find('div', class_='theorem').find_all('p'):
                    text += p.get_text()
                for j in soup.find('div',
                                   class_='theorem').find_all('div',
                                                              class_='just'):
                    for l in justificationToLink(j.get_text()):
                        fromLink.append(l)
                Propositions.append(
                    concept.Concept(
                        "Pro", book, i,
                        statement.replace('\r', '').replace('\n', ''),
                        text.replace('\r', '').replace('\n', ''), [],
                        fromLink))

    return [Definitions, Propositions]
Ejemplo n.º 3
0
    def form(self, terms):
        #actual formation
        sets = []
        multiwords = []

        #look for multiword terms, for concepts of them and then go on with the disambiguation only
        #with the head-term
        for term in terms:
            if len(term.get_terms()) > 1:
                con = self.get_concept(
                    concept.Concept(name=term.get_terms(), term=term.get_head()[0])
                )
                multiwords.append(con)
            synsets = self.lookUp(term.get_head()[0], term.get_head()[1])
            if synsets:
                sets.append(synsets)

        if sets:
            easies = self.get_easies(sets)
            rest = [x for x in sets if not x in easies]
            concepts = self.compare_easies(easies)
            concepts = self.compare_concepts(concepts, rest)

            for (mult, con) in itertools.product(multiwords, concepts):
                    if str(con.get_term()) == str(mult.get_term()):
                        mult.add_hypernym(con)
                        con.add_hyponym(mult)

            self.single_concepts = set(concepts)
        self.multi_concepts = set(multiwords)
Ejemplo n.º 4
0
    def compare_concepts(self, concepts, rest):
        #Compares already found concepts to possible synsts for each term
        #in order to find most probable one
        conNoTerm = [x[0] for x in concepts]
        restNoTerm = [y[0] for y in rest]

        for synsets in restNoTerm:
            similarities = []
            for possib in synsets:
                similarity = 0
                for con in conNoTerm:
                    similarity = similarity + possib.path_similarity(con)
                similarities.append(similarity)
            concepts.append((synsets[similarities.index(max(similarities))],
                            rest[restNoTerm.index(synsets)][1]))

        result = []
        for conc in concepts:
            c = self.get_concept(
                concept.Concept(synset=conc[0], term=conc[1], name=conc[1])
            )
            result.append(c)

        return result
Ejemplo n.º 5
0
def concept_from_formula(fmla):
    vs = sorted(list(ilu.used_variables_ast(fmla)), key=str)
    name = (','.join(str(v) + ':' + str(v.sort) for v in vs) + '.' + str(fmla))
    return co.Concept(name, vs, fmla)