Exemple #1
0
 def test_ReduceToParsesWithSameRootAndPos(self):
     self.parse2.reduceToParsesWithSameRootAndPos(Word("kop+VERB"))
     self.assertEqual(1, self.parse2.size())
     self.parse3.reduceToParsesWithSameRootAndPos(Word("topla+VERB"))
     self.assertEqual(2, self.parse3.size())
     self.parse6.reduceToParsesWithSameRootAndPos(Word("karşıla+VERB"))
     self.assertEqual(2, self.parse6.size())
    def __init__(self,
                 fileOrStr=None,
                 languageChecker: LanguageChecker = None):
        """
        Another constructor of Sentence class which takes a fileName as an input. It reads each word in the file
        and adds to words list.

        PARAMETERS
        ----------
        fileOrStr: str
            input file to read words from.
        """
        self.words = []
        if isinstance(fileOrStr, io.StringIO):
            lines = fileOrStr.readlines()
            for line in lines:
                wordList = line.split(" ")
                for word in wordList:
                    self.words.append(Word(word))
            fileOrStr.close()
        elif isinstance(fileOrStr, str):
            wordArray = fileOrStr.split(" ")
            for word in wordArray:
                if len(word) > 0:
                    if languageChecker is None or languageChecker.isValidWord(
                            word):
                        self.words.append(Word(word))
    def candidateList(self, word: Word) -> list:
        """
        The candidateList method takes a Word as an input and creates new candidates list. First it
        adds given word to this list and calls generateCandidateList method with candidates, given word and
        index 0. Then, loops i times where i ranges from 0 to size of candidates list and calls
        morphologicalAnalysis method with ith item of candidates list. If it does not return any analysis for
        given item, it removes the item from candidates list.

        PARAMETERS
        ----------
        word : Word
            Word type input.

        RETURNS
        -------
        list
            List candidates.
        """
        candidates = [word.getName()]
        self.__generateCandidateList(candidates, word.getName(), 0)
        i = 0
        while i < len(candidates):
            fsmParseList = self.fsm.morphologicalAnalysis(candidates[i])
            if fsmParseList.size() == 0:
                candidates.pop(i)
                i = i - 1
            i = i + 1
        return candidates
Exemple #4
0
 def __init__(self, parse: None):
     if parse is not None:
         self.__metaMorphemeList = []
         if parse == "+":
             self.__root = Word("+")
         else:
             words = parse.split("\\+")
             self.__root = Word(words[0])
             for i in range(1, len(words)):
                 self.__metaMorphemeList.append(words[i])
Exemple #5
0
 def test_GetParseWithLongestRootWord(self):
     self.assertEqual(Word("kopar"),
                      self.parse2.getParseWithLongestRootWord().root)
     self.assertEqual(Word("toplama"),
                      self.parse3.getParseWithLongestRootWord().root)
     self.assertEqual(Word("değerlendirme"),
                      self.parse4.getParseWithLongestRootWord().root)
     self.assertEqual(Word("soruşturma"),
                      self.parse5.getParseWithLongestRootWord().root)
     self.assertEqual(Word("karşılaştırmalı"),
                      self.parse6.getParseWithLongestRootWord().root)
Exemple #6
0
    def train(self, corpus: DisambiguationCorpus):
        """
        The train method initially creates new NGrams; wordUniGramModel, wordBiGramModel, igUniGramModel, and
        igBiGramModel. It gets the sentences from given corpus and gets each word as a DisambiguatedWord. Then, adds the
        word together with its part of speech tags to the wordUniGramModel. It also gets the transition list of that
        word and adds it to the igUniGramModel.

        If there exists a next word in the sentence, it adds the current and next {@link DisambiguatedWord} to the
        wordBiGramModel with their part of speech tags. It also adds them to the igBiGramModel with their transition
        lists.

        At the end, it calculates the NGram probabilities of both word and ig unigram models by using LaplaceSmoothing,
        and both word and ig bigram models by using InterpolatedSmoothing.

        PARAMETERS
        ----------
        corpus : DisambiguationCorpus
            DisambiguationCorpus to train.
        """
        words1 = [None]
        igs1 = [None]
        words2 = [None, None]
        igs2 = [None, None]
        self.wordUniGramModel = NGram(1)
        self.wordBiGramModel = NGram(2)
        self.igUniGramModel = NGram(1)
        self.igBiGramModel = NGram(2)
        for sentence in corpus.sentences:
            for j in range(sentence.wordCount()):
                word = sentence.getWord(j)
                if isinstance(word, DisambiguatedWord):
                    words1[0] = word.getParse().getWordWithPos()
                    self.wordUniGramModel.addNGram(words1)
                    igs1[0] = Word(word.getParse().getTransitionList())
                    self.igUniGramModel.addNGram(igs1)
                    if j + 1 < sentence.wordCount():
                        words2[0] = words1[0]
                        words2[1] = sentence.getWord(
                            j + 1).getParse().getWordWithPos()
                        self.wordBiGramModel.addNGram(words2)
                        igs2[0] = igs1[0]
                        igs2[1] = Word(
                            sentence.getWord(j +
                                             1).getParse().getTransitionList())
                        self.igBiGramModel.addNGram(igs2)
        self.wordUniGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.igUniGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.wordBiGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.igBiGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
 def __init__(self, parse:None):
     if parse is not None:
         if isinstance(parse, str):
             iGs = []
             st = parse
             while "^DB+" in st:
                 iGs.append(st[:st.index("^DB+")])
                 st = st[st.index("^DB+") + 4:]
             iGs.append(st)
             self.inflectionalGroups = []
             if iGs[0] == "++Punc":
                 self.root = Word("+")
                 self.inflectionalGroups.append(InflectionalGroup("Punc"))
             else:
                 if iGs[0].index("+") != -1:
                     self.root = Word(iGs[0][:iGs[0].index("+")])
                     self.inflectionalGroups.append(InflectionalGroup(iGs[0][iGs[0].index("+") + 1:]))
                 else:
                     self.root = Word(iGs[0])
                 for i in range(1, len(iGs)):
                     self.inflectionalGroups.append(InflectionalGroup(iGs[i]))
         elif isinstance(parse, list):
             self.inflectionalGroups = []
             if parse[0].index("+") != -1:
                 self.root = Word(parse[0][:parse[0].index("+")])
                 self.inflectionalGroups.append(InflectionalGroup(parse[0][parse[0].index("+") + 1:]))
             for i in range(1, len(parse)):
                 self.inflectionalGroups.append(InflectionalGroup(parse[i]))
    def __init__(self, parse=None):
        """
        Constructor of MorphologicalParse class which takes a String parse as an input. First it creates
        a list as iGs for inflectional groups, and while given String contains derivational boundary (^DB+), it
        adds the substring to the iGs list and continue to use given String from 4th index. If it does not contain ^DB+,
        it directly adds the given String to the iGs list. Then, it creates a new list as
        inflectionalGroups and checks for some cases.

        If the first item of iGs list is ++Punc, it creates a new root as +, and by calling
        InflectionalGroup method with Punc it initializes the IG list by parsing given input
        String IG by + and calling the getMorphologicalTag method with these substrings. If getMorphologicalTag method
        returns a tag, it adds this tag to the IG list and also to the inflectionalGroups list.

        If the first item of iGs list has +, it creates a new word of first item's substring from index 0 to +,
        and assigns it to root. Then, by calling InflectionalGroup method with substring from index 0 to +,
        it initializes the IG list by parsing given input String IG by + and calling the getMorphologicalTag
        method with these substrings. If getMorphologicalTag method returns a tag, it adds this tag to the IG list
        and also to the inflectionalGroups list.

        If the first item of iGs list does not contain +, it creates a new word with first item and assigns it as root.
        At the end, it loops through the items of iGs and by calling InflectionalGroup method with these items
        it initializes the IG list by parsing given input String IG by + and calling the getMorphologicalTag
        method with these substrings. If getMorphologicalTag method returns a tag, it adds this tag to the IG list
        and also to the inflectionalGroups list.

        PARAMETERS
        ----------
        parse : str
            String input.
        """
        if parse is not None:
            if isinstance(parse, str):
                iGs = []
                st = parse
                while "^DB+" in st:
                    iGs.append(st[:st.index("^DB+")])
                    st = st[st.index("^DB+") + 4:]
                iGs.append(st)
                self.inflectionalGroups = []
                if iGs[0] == "++Punc":
                    self.root = Word("+")
                    self.inflectionalGroups.append(InflectionalGroup("Punc"))
                else:
                    if iGs[0].index("+") != -1:
                        self.root = Word(iGs[0][:iGs[0].index("+")])
                        self.inflectionalGroups.append(
                            InflectionalGroup(iGs[0][iGs[0].index("+") + 1:]))
                    else:
                        self.root = Word(iGs[0])
                    for i in range(1, len(iGs)):
                        self.inflectionalGroups.append(
                            InflectionalGroup(iGs[i]))
            elif isinstance(parse, list):
                self.inflectionalGroups = []
                if parse[0].index("+") != -1:
                    self.root = Word(parse[0][:parse[0].index("+")])
                    self.inflectionalGroups.append(
                        InflectionalGroup(parse[0][parse[0].index("+") + 1:]))
                for i in range(1, len(parse)):
                    self.inflectionalGroups.append(InflectionalGroup(parse[i]))
Exemple #9
0
    def spellCheck(self, sentence: Sentence) -> Sentence:
        """
        The spellCheck method takes a Sentence as an input and loops i times where i ranges from 0 to size of words in
        given sentence. Then, it calls morphologicalAnalysis method with each word and assigns it to the FsmParseList,
        if the size of FsmParseList is equal to the 0, it adds current word to the candidateList and assigns it to the
        candidates list. If the size of candidates greater than 0, it generates a random number and selects an item from
        candidates list with this random number and assign it as newWord. If the size of candidates is not greater than
        0, it directly assigns the current word as newWord. At the end, it adds the newWord to the result Sentence.

        PARAMETERS
        ----------
        sentence : Sentence
            Sentence type input.

        RETURNS
        -------
        Sentence
            Sentence result.
        """
        result = Sentence()
        for i in range(sentence.wordCount()):
            word = sentence.getWord(i)
            fsmParseList = self.fsm.morphologicalAnalysis(word.getName())
            if fsmParseList.size() == 0:
                candidates = self.candidateList(word)
                if len(candidates) > 0:
                    randomCandidate = randrange(len(candidates))
                    newWord = Word(candidates[randomCandidate])
                else:
                    newWord = word
            else:
                newWord = word
            result.addWord(newWord)
        return result
Exemple #10
0
    def candidateList(self, word: Word) -> list:
        """
        The candidateList method takes a Word as an input and creates a candidates list by calling generateCandidateList
        method with given word. Then, it loop i times where i ranges from 0 to size of candidates list and creates a
        FsmParseList by calling morphologicalAnalysis with each item of candidates list. If the size of FsmParseList is
        0, it then removes the ith item.

        PARAMETERS
        ----------
        word : Word
            Word input.

        RETURNS
        -------
        list
            candidates list.
        """
        candidates = self.__generateCandidateList(word.getName())
        i = 0
        while i < len(candidates):
            fsmParseList = self.fsm.morphologicalAnalysis(candidates[i])
            if fsmParseList.size() == 0:
                newCandidate = self.fsm.getDictionary().getCorrectForm(
                    candidates[i])
                if newCandidate != "" and self.fsm.morphologicalAnalysis(
                        newCandidate).size() > 0:
                    candidates[i] = newCandidate
                else:
                    candidates.pop(i)
                    i = i - 1
            i = i + 1
        return candidates
    def deasciify(self, sentence: Sentence) -> Sentence:
        """
        The deasciify method takes a Sentence as an input and loops i times where i ranges from 0 to number of
        words in the given Sentence. First it gets ith word from given Sentence and calls candidateList with
        ith word and assigns the returned list to the newly created candidates list. And if the size of
        candidates list is greater than 0, it generates a random number and gets the item of candidates list
        at the index of random number and assigns it as a newWord. If the size of candidates list is 0, it then
        directly assigns ith word as the newWord. At the end, it adds newWord to the result Sentence.

        PARAMETERS
        ----------
        sentence : Sentence
            Sentence type input.

        RETURNS
        -------
        Sentence
            result Sentence.
        """
        result = Sentence()
        for i in range(sentence.wordCount()):
            word = sentence.getWord(i)
            fsmParseList = self.fsm.morphologicalAnalysis(word.getName())
            if fsmParseList.size() == 0:
                candidates = self.candidateList(word)
                if len(candidates) > 0:
                    randomCandidate = randrange(len(candidates))
                    newWord = Word(candidates[randomCandidate])
                else:
                    newWord = word
            else:
                newWord = word
            result.addWord(newWord)
        return result
Exemple #12
0
    def getBestRootWord(self, fsmParseList: FsmParseList) -> Word:
        """
        The getBestRootWord method takes a FsmParseList as an input and loops through the list. It gets each word with
        its part of speech tags as a new Word word and its transition list as a Word ig. Then, finds their corresponding
        probabilities. At the end returns the word with the highest probability.

        PARAMETERS
        ----------
        fsmParseList : FsmParseList
            FsmParseList is used to get the part of speech tags and transition lists of words.

        RETURNS
        -------
        Word
            The word with the highest probability.
        """
        bestProbability = -1
        bestWord = None
        for j in range(fsmParseList.size()):
            word = fsmParseList.getFsmParse(j).getWordWithPos()
            ig = Word(fsmParseList.getFsmParse(j).getTransitionList())
            wordProbability = self.wordUniGramModel.getProbability(word)
            igProbability = self.igUniGramModel.getProbability(ig)
            probability = wordProbability * igProbability
            if probability > bestProbability:
                bestWord = word
                bestProbability = probability
        return bestWord
Exemple #13
0
    def getParseWithBestIgProbability(self, parseList: FsmParseList,
                                      correctFsmParses: list,
                                      index: int) -> FsmParse:
        """
        The getParseWithBestIgProbability gets each FsmParse's transition list as a Word ig. Then, finds the
        corresponding probability. At the end returns the parse with the highest ig probability.

        PARAMETERS
        ----------
        parseList : FsmParseList
            FsmParseList is used to get the FsmParse.
        correctFsmParses : list
            FsmParse is used to get the transition lists.
        index : int
            Index of FsmParse of which transition list will be used to get the probability.

        RETURNS
        -------
        FsmParse
            The parse with the highest probability.
        """
        bestParse = None
        bestProbability = -1
        for j in range(parseList.size()):
            ig = Word(parseList.getFsmParse(j).getTransitionList())
            probability = self.getIgProbability(ig, correctFsmParses, index)
            if probability > bestProbability:
                bestParse = parseList.getFsmParse(j)
                bestProbability = probability
        return bestParse
    def getMetaMorphemeTagForParse(self, parse: MorphologicalParse,
                                   tag: str) -> list:
        """
        getMetaMorphemeTagForParse method which also takes parse as an input. It also checks the morphotactic tags.

        PARAMETERS
        ----------
        parse : MorphologicalParse
            MorphologicalParse type input.
        tag : str
            String to get meta morphemes from.

        RETURNS
        -------
        list
            List type result which holds meta morphemes.
        """
        result = []
        s = tag[0]
        if Word.isPunctuationSymbol(s):
            tag = tag[1:]
        for j in range(len(MetamorphicParse.metaMorphemes)):
            if tag == self.metaMorphemes[j] and parse.containsTag(
                    MetamorphicParse.morphotacticTags[j]):
                result.append(MetamorphicParse.morphotacticTags[j])
        return result
Exemple #15
0
    def train(self, corpus: DisambiguationCorpus):
        """
        The train method gets sentences from given DisambiguationCorpus and both word and the next word of that sentence
        at each iteration. Then, adds these words together with their part of speech tags to word unigram and bigram
        models. It also adds the last inflectional group of word to the ig unigram and bigram models.

        At the end, it calculates the NGram probabilities of both word and ig unigram models by using LaplaceSmoothing,
        and both word and ig bigram models by using InterpolatedSmoothing.

        PARAMETERS
        ----------
        corpus : DisambiguationCorpus
            DisambiguationCorpus to train.
        """
        words1 = [None]
        igs1 = [None]
        words2 = [None, None]
        igs2 = [None, None]
        self.wordUniGramModel = NGram(1)
        self.igUniGramModel = NGram(1)
        self.wordBiGramModel = NGram(2)
        self.igBiGramModel = NGram(2)
        for sentence in corpus.sentences:
            for j in range(sentence.wordCount() - 1):
                word = sentence.getWord(j)
                nextWord = sentence.getWord(j + 1)
                words2[0] = word.getParse().getWordWithPos()
                words1[0] = words2[0]
                words2[1] = nextWord.getParse().getWordWithPos()
                self.wordUniGramModel.addNGram(words1)
                self.wordBiGramModel.addNGram(words2)
                for k in range(nextWord.getParse().size()):
                    igs2[0] = Word(
                        word.getParse().getLastInflectionalGroup().__str__())
                    igs2[1] = Word(
                        nextWord.getParse().getInflectionalGroup(k).__str__())
                    self.igBiGramModel.addNGram(igs2)
                    igs1[0] = igs2[1]
                    self.igUniGramModel.addNGram(igs1)
        self.wordUniGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.igUniGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.wordBiGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
        self.igBiGramModel.calculateNGramProbabilitiesSimple(
            LaplaceSmoothing())
Exemple #16
0
 def getMetaMorphemeTag(self, tag: str) -> list:
     result = []
     s = tag[0]
     if Word.isPunctuationSymbol(s):
         tag = tag[1:]
     for j in range(len(MetamorphicParse.metaMorphemes)):
         if tag == self.metaMorphemes[j]:
             result.append(MetamorphicParse.morphotacticTags[j])
     return result
 def test_WordAsciify(self):
     self.assertEqual("cogusiCOGUSI", self.simpleAsciifier.asciifyWord(Word("çöğüşıÇÖĞÜŞİ")))
     self.assertEqual("sogus", self.simpleAsciifier.asciifyWord(Word("söğüş")))
     self.assertEqual("uckagitcilik", self.simpleAsciifier.asciifyWord(Word("üçkağıtçılık")))
     self.assertEqual("akiskanlistiricilik", self.simpleAsciifier.asciifyWord(Word("akışkanlıştırıcılık")))
     self.assertEqual("citcitcilik", self.simpleAsciifier.asciifyWord(Word("çıtçıtçılık")))
     self.assertEqual("duskirikligi", self.simpleAsciifier.asciifyWord(Word("düşkırıklığı")))
     self.assertEqual("yuzgorumlugu", self.simpleAsciifier.asciifyWord(Word("yüzgörümlüğü")))
Exemple #18
0
    def spellCheck(self, sentence: Sentence) -> Sentence:
        """
        The spellCheck method takes a Sentence as an input and loops i times where i ranges from 0 to size of words in
        given sentence. Then, it calls morphologicalAnalysis method with each word and assigns it to the FsmParseList,
        if the size of FsmParseList is equal to the 0, it adds current word to the candidateList and assigns it to the
        candidates list.

        Later on, it loops through candidates list and calls morphologicalAnalysis method with each word and assigns it
        to the FsmParseList. Then, it gets the root from FsmParseList. For the first time, it defines a previousRoot by
        calling getProbability method with root, and for the following times it calls getProbability method with
        previousRoot and root. Then, it finds out the best probability and the corresponding candidate as best candidate
        and adds it to the result Sentence.

        If the size of FsmParseList is not equal to 0, it directly adds the current word to the result Sentence and
        finds the previousRoot directly from the FsmParseList.

        PARAMETERS
        ----------
        sentence : Sentence
            Sentence type input.

        RETURNS
        -------
        Sentence
            Sentence result.
        """
        previousRoot = None
        result = Sentence()
        for i in range(sentence.wordCount()):
            word = sentence.getWord(i)
            fsmParses = self.fsm.morphologicalAnalysis(word.getName())
            if fsmParses.size() == 0:
                candidates = self.candidateList(word)
                bestCandidate = word.getName()
                bestRoot = word
                bestProbability = 0.0
                for candidate in candidates:
                    fsmParses = self.fsm.morphologicalAnalysis(candidate)
                    root = fsmParses.getParseWithLongestRootWord().getWord()
                    if previousRoot is not None:
                        probability = self.__nGram.getProbability(
                            previousRoot.getName(), root.getName())
                    else:
                        probability = self.__nGram.getProbability(
                            root.getName())
                    if probability > bestProbability:
                        bestCandidate = candidate
                        bestRoot = root
                        bestProbability = probability
                previousRoot = bestRoot
                result.addWord(Word(bestCandidate))
            else:
                result.addWord(word)
                previousRoot = fsmParses.getParseWithLongestRootWord().getWord(
                )
        return result
    def __init__(self, parse=None):
        """
        A constructor of MetamorphicParse class which creates an list metaMorphemeList which has split words
        according to +.

        PARAMETERS
        ----------
        parse : str
            String to parse.
        """
        if parse is not None:
            self.__metaMorphemeList = []
            if parse == "+":
                self.__root = Word("+")
            else:
                words = parse.split("\\+")
                self.__root = Word(words[0])
                for i in range(1, len(words)):
                    self.__metaMorphemeList.append(words[i])
    def getWordWithPos(self) -> Word:
        """
        The getWordWithPos method returns root with the MorphologicalTag of the first inflectional as a new word.

        RETURNS
        -------
        Word
            Root with the MorphologicalTag of the first inflectional as a new word.
        """
        return Word(self.root.getName() + "+" + InflectionalGroup.getTagString(self.firstInflectionalGroup().getTag(0)))
Exemple #21
0
    def __init__(self, surfaceForm: str):
        """
        Binary attribute for a given word. If the word is "bay" or "bayan", the attribute will have the value "true",
        otherwise "false".

        PARAMETERS
        ----------
        surfaceForm : str
            Surface form of the word.
        """
        super().__init__(Word.isHonorific(surfaceForm))
    def __init__(self, surfaceForm: str):
        """
        Binary attribute for a given word. If the word is "dolar", "euro", "sterlin", etc., the attribute will have the
        value "true", otherwise "false".

        PARAMETERS
        ----------
        surfaceForm : str
            Surface form of the word.
        """
        super().__init__(Word.isMoney(surfaceForm))
    def __init__(self, surfaceForm: str):
        """
        Binary attribute for a given word. If the word represents a time form, the attribute will have the
        value "true", otherwise "false".

        PARAMETERS
        ----------
        surfaceForm : str
            Surface form of the word.
        """
        super().__init__(Word.isTime(surfaceForm))
Exemple #24
0
    def __init__(self, surfaceForm: str):
        """
        Binary attribute for a given word. If the word is "corp.", "inc." or "co.", the attribute will have the
        value "true", otherwise "false".

        PARAMETERS
        ----------
        surfaceForm : str
            Surface form of the word.
        """
        super().__init__(Word.isOrganization(surfaceForm))
Exemple #25
0
 def getMetaMorphemeTagForParse(self, parse: MorphologicalParse,
                                tag: str) -> list:
     result = []
     s = tag[0]
     if Word.isPunctuation(s):
         tag = tag[1:]
     for j in range(len(MetamorphicParse.metaMorphemes)):
         if tag == self.metaMorphemes[j] and parse.containsTag(
                 MetamorphicParse.morphotacticTags[j]):
             result.append(MetamorphicParse.morphotacticTags[j])
     return result
Exemple #26
0
 def test_GetCount(self):
     self.assertEqual(309, self.corpus.getCount(Word("mustafa")))
     self.assertEqual(109, self.corpus.getCount(Word("kemal")))
     self.assertEqual(122, self.corpus.getCount(Word("atatürk")))
     self.assertEqual(4, self.simpleCorpus.getCount(Word("ali")))
     self.assertEqual(3, self.simpleCorpus.getCount(Word("gitti")))
     self.assertEqual(4, self.simpleCorpus.getCount(Word("at")))
Exemple #27
0
    def isChunkLabel(self) -> bool:
        """
        Checks if this symbol can be a chunk label or not.

        RETURNS
        -------
        bool
            True if this symbol can be a chunk label, false otherwise.
        """
        if Word.isPunctuationSymbol(self.name) or self.name.replace("-.*", "") in self.sentenceLabels or \
                self.name.replace("-.*", "") in self.phraseLabels:
            return True
        return False
 def setUp(self) -> None:
     self.sentence = Sentence()
     self.sentence.addWord(Word("ali"))
     self.sentence.addWord(Word("topu"))
     self.sentence.addWord(Word("at"))
     self.sentence.addWord(Word("mehmet"))
     self.sentence.addWord(Word("ayşeyle"))
     self.sentence.addWord(Word("gitti"))
Exemple #29
0
    def contains(self, word: str) -> bool:
        """
        The contains method takes a String word as an input and checks whether wordList CounterHashMap has the
        given word and returns true if so, otherwise returns false.

        PARAMETERS
        ----------
        word : str
            String input to check.

        RETURNS
        -------
        bool
            True if wordList has the given word, False otherwise.
        """
        return Word(word) in self.wordList
Exemple #30
0
    def wordCount(self, excludeStopWords: bool) -> int:
        """
        Recursive function to count the number of words in the subtree rooted at this node.

        PARAMETERS
        ----------
        excludeStopWords : bool
            If true, stop words are not counted.

        RETURNS
        -------
        int
            Number of words in the subtree rooted at this node.
        """
        if len(self.children) == 0:
            if not excludeStopWords:
                total = 1
            else:
                if Word.isPunctuationSymbol(self.data.getName()) or \
                        "*" in self.data.getName().lower() or self.data.getName().lower() == "at" or self.data.getName().lower() == "the" or \
                        self.data.getName().lower() == "to" or self.data.getName().lower() == "a" or self.data.getName().lower() == "an" or \
                        self.data.getName().lower() == "not" or self.data.getName().lower() == "is" or self.data.getName().lower() == "was" or \
                        self.data.getName().lower() == "were" or self.data.getName().lower() == "have" or \
                        self.data.getName().lower() == "had" or self.data.getName().lower() == "has" or \
                        self.data.getName().lower() == "by" or self.data.getName().lower() == "at" or self.data.getName().lower() == "'re" or \
                        self.data.getName().lower() == "on" or self.data.getName().lower() == "off" or self.data.getName().lower() == "'s" or \
                        self.data.getName().lower() == "n't" or self.data.getName().lower() == "can" or \
                        self.data.getName().lower() == "could" or self.data.getName().lower() == "may" or \
                        self.data.getName().lower() == "might" or self.data.getName().lower() == "will" or \
                        self.data.getName().lower() == "would" or self.data.getName().lower() == "as" or\
                        self.data.getName().lower() == "with" or self.data.getName().lower() == "for" or self.data.getName().lower() == "in" or\
                        self.data.getName().lower() == "will" or self.data.getName().lower() == "would" or \
                        self.data.getName().lower() == "than" or self.data.getName().lower() == "$" or \
                        self.data.getName().lower() == "and" or self.data.getName().lower() == "or" or self.data.getName().lower() == "of" or \
                        self.data.getName().lower() == "are" or self.data.getName().lower() == "be" or \
                        self.data.getName().lower() == "been" or self.data.getName().lower() == "do" or self.data.getName().lower() == "few" or\
                        self.data.getName().lower() == "there" or self.data.getName().lower() == "up" or self.data.getName().lower() == "down":
                    total = 0
                else:
                    total = 1
        else:
            total = 0
        for child in self.children:
            total += child.wordCount(excludeStopWords)
        return total