Ejemplo n.º 1
0
 def recoursiveFind(self, sentenceDoc, subject, verb, root):
     pred = verb.word
     adv = findVerbModifier(sentenceDoc)
     flag = True
     if adv:
         pred = pred + adv.orth_
         flag = False
         root = adv
     for child in root.children:
         if child.dep_ == "prep":
             for proj in child.children:
                 if proj.dep_ == "pobj":
                     temp = self.nounArray.findWord(proj.orth_)
                     if flag:
                         newWord = datastructure.Word(child.orth_)
                         newWord.addType(child.pos_)
                         newWord.addUri(wordUri.findUri(newWord))
                         #newWord.addUri(newWord.word + "URI")
                         print(subject.uri, "- " + newWord.uri + " -",
                               temp.uri)
                         self.file.write(subject.uri + "; " + newWord.uri +
                                         "; " + temp.uri + "\n")
                     else:
                         newWord = datastructure.Word(adv.orth_)
                         newWord.addType(adv.pos_)
                         newWord.addUri(wordUri.findUri(newWord))
                         #newWord.addUri(newWord.word + "URI")
                         print(subject.uri, "- " + newWord.uri + " -",
                               temp.uri)
                         self.file.write(subject.uri + "; " + newWord.uri +
                                         "; " + temp.uri + "\n")
                     break
Ejemplo n.º 2
0
    def findThird(self, sentenceDoc, subject, verb, children, flag):
        for child in children:
            if child.dep_ == "appos" or child.dep_ == "pobj":
                temp = self.nounArray.findWord(child.orth_)
                if temp is None:
                    w = datastructure.Word(child.orth_)
                    w.addType(child.pos_)
                    w.addUri(wordUri.findUri(w))
                    #w.addUri(w.word + "URI")
                    print(subject.uri, "- " + verb.uri + " -", w.uri)

                    self.writeOtter(subject.uri, verb.uri, w.uri)

                else:
                    print(subject.uri, "- " + verb.uri + " -", temp.uri)
                    self.writeOtter(subject.uri, verb.uri, temp.uri)

                #self.recoursiveFind(sentenceDoc, subject, verb, child)
            if child.dep_ == "prep" or child.dep_ == "acomp":
                if not flag:
                    verb = datastructure.Word(child.orth_)
                    verb.addType(child.pos_)
                    verb.addUri(wordUri.findUri(verb))

                verbChildren = []
                for ch in child.children:
                    verbChildren.append(ch)

                self.findThird(sentenceDoc, subject, verb, verbChildren, True)
Ejemplo n.º 3
0
    def findThird(self, sentenceDoc, subject, verb, children):
        found = False
        for child in children:

            if child.dep_ == "attr":
                temp = self.nounArray.findWord(child.orth_)
                if temp is None:
                    w = datastructure.Word(child.orth_)
                    w.addType(child.pos_)
                    w.addUri(wordUri.findUri(child.lemma_))
                    #w.addUri(w.word + "URI")
                    print(subject.uri, "- " + verb.uri + " -", w.uri)
                    self.file.write(subject.uri + "; " + verb.uri + "; " +
                                    w.uri + "\n")
                else:
                    print(subject.uri, "- " + verb.uri + " -", temp.uri)
                    self.file.write(subject.uri + "; " + verb.uri + "; " +
                                    temp.uri + "\n")
                self.recoursiveFind(sentenceDoc, subject, verb, child)
                found = True
        if not found:
            for word in sentenceDoc:
                if word.dep_ == "ROOT":
                    verbDoc = word
                    break
            self.recoursiveFind(sentenceDoc, subject, verb, verbDoc)
Ejemplo n.º 4
0
    def findNouns(self):
        nounArray = WordArray()
        entities = list(self.doc.ents)
        # print("There were {} entities found".format(len(entities)))
        # print(entities)
        for e in [
                entity for entity in entities if entity.label_ not in [
                    'DATE', 'TIME', 'PERCENT', 'CARDINAL', 'MONEY', 'QUANTITY',
                    'ORDINAL', 'CARDINAL'
                ]
        ]:
            temp = datastructure.Noun(e.orth_)
            temp.addCategory(e.label_)
            end = True
            for w in self.doc:
                if end:
                    string = e.orth_.split()
                    if w.orth_ in string:
                        temp.addType(w.pos_)
                        temp.addUri(wordUri.findUri(temp))
                        end = False

            nounArray.addWord(temp)

        for word in self.doc.noun_chunks:
            temp = datastructure.Noun(word.orth_)
            temp.addCategory("UNKNOWN")
            temp.addType("NOUN")
            if temp not in nounArray:
                temp.addUri(wordUri.findUri(temp))
                nounArray.addWord(temp)
                print(temp)

        for word in self.doc:
            # print(nc.orth_)
            if word.pos_ == 'NOUN' or word.pos_ == "PROPN":
                temp = datastructure.Noun(word.orth_)
                temp.addCategory("UNKNOWN")
                temp.addType(word.pos_)
                temp.addUri(wordUri.findUri(temp))
                if temp not in nounArray:
                    nounArray.addWord(temp)

        nounArray = purifyNounArray(nounArray)
        nounArray = removeArticles(nounArray, self.doc)

        return nounArray
Ejemplo n.º 5
0
    def findVerbs(self):
        verbsArray = WordArray()
        temp = []
        for word in self.doc:
            if word.pos_ == 'VERB' and word.orth_ not in temp:

                temp.append(word.orth_)
                verb = datastructure.Verb(word.orth_)
                verb.addRoot(word.lemma_)
                verb.addType(word.pos_)

                verb.addUri(wordUri.findUri(verb))

                verbsArray.addWord(verb)

        return verbsArray
Ejemplo n.º 6
0
def removeArticles(nounArray, doc):

    removeList = ["det", "amod", "nummod"]

    for word in nounArray:
        if len(word.word.split()) > 1 and word.type != "PROPN":
            for w in doc:
                if w.orth_ in word.word.split() and w.dep_ in removeList:
                    temp = word.word.split()
                    string = ""
                    for t in temp:
                        if w.orth_ != t:
                            string = string + t + " "
                    word.word = string[:-1]
        word.addUri(wordUri.findUri(word))
    return nounArray