Esempio n. 1
0
 def generateInstanceFromSentence(self, sentence: Sentence, wordIndex: int) -> Instance:
     word = sentence.getWord(wordIndex)
     if isinstance(word, AnnotatedWord):
         classLabel = NamedEntityType.getNamedEntityString(word.getNamedEntityType())
         current = Instance(classLabel)
         self.addAttributes(current, sentence, wordIndex)
         return current
    def generateInstanceFromSentence(self, sentence: Sentence,
                                     wordIndex: int) -> Instance:
        """
        Generates a single classification instance of the morphological disambiguation problem for the given word of the
        given sentence. If the word does not have a morphological parse, the method throws InstanceNotGenerated.

        PARAMETERS
        ----------
        sentence : Sentence
            Input sentence.
        wordIndex : int
            The index of the word in the sentence.

        RETURNS
        -------
        Instance
            Classification instance.
        """
        word = sentence.getWord(wordIndex)
        if isinstance(word, AnnotatedWord):
            current = Instance(word.getParse().getTransitionList())
            for i in range(self.windowSize):
                if wordIndex - self.windowSize + i >= 0:
                    self.addAttributesForPreviousWords(
                        current, sentence, wordIndex - self.windowSize + i)
                else:
                    self.addAttributesForEmptyWords(current, "<s>")
            self.addAttributesForPreviousWords(current, sentence, wordIndex)
            return current
    def initWithFile(self, fileName: str):
        """
        Constructor for generating a new DataSet from given File.

        PARAMETERS
        ----------
        fileName : str
            File to generate DataSet from.
        """
        self.__instances = InstanceList()
        self.__definition = DataDefinition()
        inputFile = open(fileName, 'r', encoding='utf8')
        lines = inputFile.readlines()
        i = 0
        for line in lines:
            attributes = line.split(",")
            if i == 0:
                for j in range(len(attributes) - 1):
                    try:
                        float(attributes[j])
                        self.__definition.addAttribute(
                            AttributeType.CONTINUOUS)
                    except:
                        self.__definition.addAttribute(AttributeType.DISCRETE)
            else:
                if len(attributes) != self.__definition.attributeCount() + 1:
                    continue
            if ";" not in attributes[len(attributes) - 1]:
                instance = Instance(attributes[len(attributes) - 1])
            else:
                labels = attributes[len(attributes) - 1].split(";")
                instance = CompositeInstance(labels[0], None, labels)
            for j in range(len(attributes) - 1):
                if self.__definition.getAttributeType(
                        j) is AttributeType.CONTINUOUS:
                    instance.addAttribute(
                        ContinuousAttribute(float(attributes[j])))
                elif self.__definition.getAttributeType(
                        j) is AttributeType.DISCRETE:
                    instance.addAttribute(DiscreteAttribute(attributes[j]))
            if instance.attributeSize() == self.__definition.attributeCount():
                self.__instances.add(instance)
            i = i + 1
Esempio n. 4
0
    def generateInstanceFromSentence(self, sentence: Sentence, wordIndex: int) -> Instance:
        """
        Generates a single classification instance of the Shallow Parse problem for the given word of the given
        sentence. If the  word has not been labeled with shallow parse tag yet, the method returns null.

        PARAMETERS
        ----------
        sentence : Sentence
            Input sentence.
        wordIndex : int
            The index of the word in the sentence.

        RETURNS
        -------
        Instance
            Classification instance.
        """
        word = sentence.getWord(wordIndex)
        if isinstance(word, AnnotatedWord):
            classLabel = word.getShallowParse()
            current = Instance(classLabel)
            self.addAttributes(current, sentence, wordIndex)
            return current