Python NaturalLanguageObject Exemples, Modules.NaturalLanguage.NaturalLanguageObject Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : NetworkTrainer.py Projet : guningyi/ml_app

 def loadTextFromFile(self, InputFile):
     ConsoleOutput.printGreen("Loading text data from: (" + InputFile + ")")
     # Convert to natural language object
     sentence = []
     for line in open(InputFile, 'r', encoding='UTF-8'):
         line = self.thu.cut(line.strip(), text=True)
         sentence.extend(line.split())
     ConsoleOutput.printGreen("Data load successful. WordCount: " +
                              str(len(sentence)))
     self._nloTextData = NaturalLanguageObject(sentence)

Exemple #2

0

Afficher le fichier

Fichier : NetworkTrainer.py Projet : mengqhui/ANN

 def loadTextFromFile(self, InputFile):
     ConsoleOutput.printGreen("Loading text data from: (" + InputFile + ")")
     sentence = []
     # Convert to natural language object
     for line in open(InputFile):
         #line = line.lower()
         # remove completely
         line = line.replace('"', '')
         line = line.replace("'", '')
         # seperate punctuation from eachother so they have seprate tokens
         line = re.sub(r'(.)([,.!?:;"()\'\"])', r'\1 \2', line)
         # seperate from both directions
         line = re.sub(r'([,.!?:;"()\'\"])(.)', r'\1 \2', line)
         sentence.extend(line.split())
     ConsoleOutput.printGreen("Data load successful. WordCount: " +
                              str(len(sentence)))
     self._nloTextData = NaturalLanguageObject(sentence)

Exemple #3

0

Afficher le fichier

Fichier : NetworkTrainer.py Projet : guningyi/ml_app

    def loadTextFromFile_backup(self, InputFile):
        ConsoleOutput.printGreen("Loading text data from: (" + InputFile + ")")
        sentence = []
        # Convert to natural language object
        for line in open(InputFile, 'r', encoding='UTF-8'):
            #line = line.lower()
            # remove completely
            line = line.replace('"', '')
            line = line.replace("'", '')
            # seperate punctuation from each other so they have seprate tokens
            #line = re.sub( r'(.)([,.!?:;"()\'\"])', r'\1 \2', line)
            # seperate from both directions
            #line = re.sub( r'([,.!?:;"()\'\"])(.)', r'\1 \2', line)

            #sub函数第一个参数是要匹配的模式，第二个参数是要替换成的目标，第三个参数是要被正则匹配的源
            line = re.sub(r'(.)([，。！？：“（）‘”“’])', r'\1 \2', line)
            line = re.sub(r'([，。！？：；”（）“”‘’])(.)', r'\1 \2', line)
            sentence.extend(line.split())
        ConsoleOutput.printGreen("Data load successful. WordCount: " +
                                 str(len(sentence)))
        self._nloTextData = NaturalLanguageObject(sentence)

Exemple #4

0

Afficher le fichier

Fichier : main.py Projet : guningyi/ml_app

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/Sstt.utf8.txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30
    _OutputFile = None

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if(val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif(val == "-utv"):
            _isUnitTestingV = True
        elif(len(consoleInArgs) >= index+1):
            # specify training data location
            if(val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index+1]
                ConsoleOutput.printGreen("Training data load locaiton changed to: \"" + _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif(val == "-ts"):
                _TestSentence = consoleInArgs[index+1]
                if(len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError('Test sequence must be the same length as the vector training size. (' + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif(val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index+1])
                ConsoleOutput.printGreen("Test sequence generation size changed to: " + str(_TestSequenceGenSize))
            # set the output file for the generated data to be printed to
            elif(val == "-of"):
                _OutputFile = str(consoleInArgs[index+1])
                ConsoleOutput.printGreen("Output generation location changed to: (" + consoleInArgs[index+1]+ ")")
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    MLNetworkSS = NNSentenceStructure()
    MLNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(MLNetworkV)
    # Pass the vectors into the network
    MLNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS, networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    MLNetworkSS.FitNetwork()
    MLNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if(_isUnitTestingSS):
        #if(uTester == None):
            #uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS, _TrainRangeV)
        #uTester.TestSentenceStructuring()
        print("_isUnitTestingSS is true")
    # use console argument "-utv" to activate
    if(_isUnitTestingV):
        #if(uTester == None):
            #uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS, _TrainRangeV)
        #uTester.TestVocabulary()
        print("_isUnitTestingV is true")
    if(_TestSentence != ""):
        print("_TestSentence is true")
        printToFile = False
        f = None
        # user has specified output location
        if(_OutputFile != None):
            printToFile = True
            f = open(_OutputFile,'w')
        genSize = _TestSequenceGenSize  #要生成的目标文章的大小
        initialInput = _TestSentence
        if(printToFile):
            f.write(initialInput + " ")
        else:
            print(initialInput + " ", end="")
        initialInput = initialInput.split()  # 输入的关键词分割
        # generate a sentence of genSize
        for index in range(0, genSize):
            #print(initialInput)
            nlo = NaturalLanguageObject(initialInput)
            #解决中文切词中存在的二意切词问题，为了让测试数据的维度能匹配训练数据的维度，要丢掉[('word', tag), ('word', tag),('word', tag),('word', tag),('word', tag)...]
            #头部多余的部分tuple，否则KNN分类器会报错。
            diff = len(nlo.sentenceNormalised) - _TrainRangeSS
            if(diff > 0):
                nlo.sentenceNormalised = nlo.sentenceNormalised[diff:]
            # since nlo will always be the right size, we can use that variable
            predToke = MLNetworkSS.getPrediction([nlo.sentenceNormalised])
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = MLNetworkV.getPredictedWord(nlo.sentenceNormalised[-1], nextToke[0])
            # decide whether to print to file or console
            if(printToFile):
                f.write(str(word) + " ")
            else:
                print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Exemple #5

0

Afficher le fichier

class UnitTester:
    neuralNetworkSS = None
    neuralNetworkV = None
    VectorSizeSS = 3
    VectorSizeV = 1
    _TestingPara = testingParaHarryPotter
    _TestingParaNlo = NaturalLanguageObject(_TestingPara)

    def TestVocabulary(self):
        #testingPara = testingParaHarryPotter
        testingPara = self._TestingPara
        passedTests = []
        nonFatalTests = []
        failedTests = []

        # Build a test sequence form each word
        for index, val in enumerate(
                self._TestingParaNlo.sentenceTokenList[1:]):
            prevWord = self._TestingParaNlo.sentenceTokenList[index - 1][0]
            prevWordToken = self._TestingParaNlo.sentenceTokenList[index -
                                                                   1][1]
            prevWordTokenNormal = self._TestingParaNlo.sentenceNormalised[index
                                                                          - 1]

            curWord = val[0]
            curToken = val[1]
            curNormal = self._TestingParaNlo.sentenceNormalised[index]

            prediction = self.neuralNetworkV.getPredictedWord(
                prevWordTokenNormal, curToken)
            probList = self.neuralNetworkV.getPredictionProbability(
                prevWordTokenNormal, curToken)

            prob = 0
            for val in probList[0]:
                if (val > prob):
                    prob = val

            if (str(curWord.lower()) == str(prediction).lower()):
                passedTests.append("(" + str(prevWord) + ", " +
                                   str(prevWordToken) + ")        Target: " +
                                   str(curWord) + "        Pred: " +
                                   str(prediction) + "   " + str(prob * 100) +
                                   "%")
            else:
                if (prob < 0.2):
                    failedTests.append("(" + str(prevWord) + ", " +
                                       str(prevWordToken) +
                                       ")        Target: " + str(curWord) +
                                       "        Pred: " + str(prediction) +
                                       "    " + str(prob * 100) + "%")
                elif (prob > 0.6):
                    passedTests.append("(" + str(prevWord) + ", " +
                                       str(prevWordToken) +
                                       ")        Target: " + str(curWord) +
                                       "        Pred: " + str(prediction) +
                                       "   " + str(prob * 100) + "%")
                else:
                    nonFatalTests.append("(" + str(prevWord) + ", " +
                                         str(prevWordToken) +
                                         ")        Target: " + str(curWord) +
                                         "        Pred: " + str(prediction) +
                                         "    " + str(prob * 100) + "%")

        # print results
        print("\n")
        print("********** TestSentenceStructuring() **********")
        print("\n")

        ConsoleOutput.printUnderline("Failed Tests: (" +
                                     str(len(failedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in failedTests:
            ConsoleOutput.printRed(val)
        print("\n")
        ConsoleOutput.printUnderline("Non-Fatal failed Tests: (" +
                                     str(len(nonFatalTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in nonFatalTests:
            ConsoleOutput.printYellow(val)
        print("\n")
        ConsoleOutput.printUnderline("Passed Tests: (" +
                                     str(len(passedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in passedTests:
            ConsoleOutput.printGreen(val)
        print("\n")

        ConsoleOutput.printYellow("Passed: " + str(len(passedTests)) +
                                  "   Non-Fatals: " + str(len(nonFatalTests)) +
                                  "   Fails: " + str(len(failedTests)))
        print("\n")

    def TestSentenceStructuring(self):

        #testingPara = testingParaHarryPotter
        testingPara = self._TestingPara
        passedTests = []
        nonFatalTests = []
        failedTests = []
        # used to predict accuracy of the network
        acTestPred = []
        acTestTrue = []

        # Build a test sequence form each word
        for index, val in enumerate(testingPara):
            tmpTestSeq = []
            target = None
            # grab the next 3 words after
            if (index < len(testingPara) - (self.VectorSizeSS + 1)):
                for index2 in range(0, self.VectorSizeSS):
                    tmpTestSeq.append(testingPara[index + index2])
                target = testingPara[index + self.VectorSizeSS]
                # convert to natural language object
                nloTester = NaturalLanguageObject(tmpTestSeq)
                nloTarget = NaturalLanguageObject([target])
                # get nerual network prediction
                normalPred = self.neuralNetworkSS.getPrediction(
                    nloTester.sentenceNormalised)
                prediction = str(nloTester.tokeniseNormals([normalPred]))
                comp = str(nloTarget.sentenceTags)

                cTrue = nloTarget.sentenceNormalised[0]
                acTestTrue.append(cTrue * 100)
                acTestPred.append(normalPred * 100)

                #if first letters match, this means 'NN' will match with 'NNS'
                if (prediction[2] == comp[2]):
                    #filter for probability
                    probList = self.neuralNetworkSS.getPredictionProbability(
                        nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if (val > prob):
                            prob = val
                    passedTests.append(
                        str(nloTester.sentenceTokenList) + "   Target: " +
                        str(nloTarget.sentenceTokenList) + "    Prediction: " +
                        prediction + " " + str(prob * 100) + "%")
                else:
                    probList = self.neuralNetworkSS.getPredictionProbability(
                        nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if (val > prob):
                            prob = val
                    # if accuracy s less than 30% add to failed list
                    if (prob < 0.3):
                        failedTests.append(
                            str(nloTester.sentenceTokenList) + "   Target: " +
                            str(nloTarget.sentenceTokenList) +
                            "    Prediction: " + prediction + " " +
                            str(prob * 100) + "%")
                    else:
                        # if probability is more than 60% its probably passed
                        if (prob > 0.6):
                            passedTests.append(
                                str(nloTester.sentenceTokenList) +
                                "   Target: " +
                                str(nloTarget.sentenceTokenList) +
                                "    Prediction: " + prediction + " " +
                                str(prob * 100) + "%")
                        else:
                            nonFatalTests.append(
                                str(nloTester.sentenceTokenList) +
                                "   Target: " +
                                str(nloTarget.sentenceTokenList) +
                                "    Prediction: " + prediction + " " +
                                str(prob * 100) + "%")

        # print results
        print("\n")
        print("********** TestSentenceStructuring() **********")
        print("\n")
        ConsoleOutput.printUnderline("Failed Tests: (" +
                                     str(len(failedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in failedTests:
            ConsoleOutput.printRed(val)
        print("\n")
        ConsoleOutput.printUnderline("Non-Fatal failed Tests: (" +
                                     str(len(nonFatalTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in nonFatalTests:
            ConsoleOutput.printYellow(val)
        print("\n")
        ConsoleOutput.printUnderline("Passed Tests: (" +
                                     str(len(passedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in passedTests:
            ConsoleOutput.printGreen(val)
        print("\n")

        nnAccuracy = accuracy_score(
            np.array(acTestTrue).astype(int),
            np.array(acTestPred).astype(int))
        ConsoleOutput.printYellow("Passed: " + str(len(passedTests)) +
                                  "   Non-Fatals: " + str(len(nonFatalTests)) +
                                  "   Fails: " + str(len(failedTests)))
        ConsoleOutput.printYellow("NeuralNetork accuracy: " +
                                  str(round(nnAccuracy * 100, 1)) + "%")
        print("\n")

    def __init__(self, inNeuralNetworkSS, inNeuralNetworkV, inVectorSizeSS,
                 inVectorSizeV):
        self.neuralNetworkSS = inNeuralNetworkSS
        self.neuralNetworkV = inNeuralNetworkV
        self.VectorSizeSS = inVectorSizeSS
        self.VectorSizeV = inVectorSizeV

Exemple #6

0

Afficher le fichier

Fichier : main.py Projet : lumiscript/ann-writer

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/HarryPotter(xxlarge).txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if (val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif (val == "-utv"):
            _isUnitTestingV = True
        elif (len(consoleInArgs) >= index + 1):
            # specify training data location
            if (val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index + 1]
                ConsoleOutput.printGreen(
                    "Training data load locaiton changed to: \"" +
                    _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif (val == "-ts"):
                _TestSentence = consoleInArgs[index + 1]
                if (len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError(
                        'Test sequence must be the same length as the vector training size. ('
                        + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif (val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index + 1])
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    MLNetworkSS = NNSentenceStructure()
    MLNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(MLNetworkV)
    # Pass the vectors into the network
    MLNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS,
                                       networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    MLNetworkSS.FitNetwork()
    MLNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if (_isUnitTestingSS):
        if (uTester == None):
            uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS,
                                 _TrainRangeV)
        uTester.TestSentenceStructuring()
    # use console argument "-utv" to activate
    if (_isUnitTestingV):
        if (uTester == None):
            uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS,
                                 _TrainRangeV)
        uTester.TestVocabulary()

    if (_TestSentence != ""):
        genSize = _TestSequenceGenSize
        initialInput = _TestSentence
        print(initialInput + " ", end="")
        initialInput = initialInput.split()
        # generate a sentence of genSize
        for index in range(0, genSize):
            nlo = NaturalLanguageObject(initialInput)
            # since nlo will always be the right size, we can use that variable
            predToke = MLNetworkSS.getPrediction(nlo.sentenceNormalised)
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = MLNetworkV.getPredictedWord(nlo.sentenceNormalised[-1],
                                               nextToke[0])
            print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Exemple #7

0

Afficher le fichier

Fichier : main.py Projet : EricSchles/ann-writer

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/HarryPotter(xxlarge).txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if(val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif(val == "-utv"):
            _isUnitTestingV = True
        elif(len(consoleInArgs) >= index+1):
            # specify training data location
            if(val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index+1]
                ConsoleOutput.printGreen("Training data load locaiton changed to: \"" + _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif(val == "-ts"):
                _TestSentence = consoleInArgs[index+1]
                if(len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError('Test sequence must be the same length as the vector training size. (' + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif(val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index+1])
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    neuralNetworkSS = NNSentenceStructure()
    neuralNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(neuralNetworkV)
    # Pass the vectors into the network
    neuralNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS, networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    neuralNetworkSS.FitNetwork()
    neuralNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if(_isUnitTestingSS):
        if(uTester == None):
            uTester = UnitTester(neuralNetworkSS, neuralNetworkV, _TrainRangeSS, _TrainRangeV)
        uTester.TestSentenceStructuring()
    # use console argument "-utv" to activate
    if(_isUnitTestingV):
        if(uTester == None):
            uTester = UnitTester(neuralNetworkSS, neuralNetworkV, _TrainRangeSS, _TrainRangeV)
        uTester.TestVocabulary()

    if(_TestSentence != ""):
        genSize = _TestSequenceGenSize
        initialInput = _TestSentence
        print(initialInput + " ", end="")
        initialInput = initialInput.split()
        # generate a sentence of genSize
        for index in range(0, genSize):
            nlo = NaturalLanguageObject(initialInput)
            # since nlo will always be the right size, we can use that variable
            predToke = neuralNetworkSS.getPrediction(nlo.sentenceNormalised)
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = neuralNetworkV.getPredictedWord(nlo.sentenceNormalised[-1], nextToke[0])
            print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Exemple #8

0

Afficher le fichier

Fichier : UnitTesting.py Projet : EricSchles/ann-writer

    def TestSentenceStructuring(self):

        #testingPara = testingParaHarryPotter
        testingPara = self._TestingPara
        passedTests = []
        nonFatalTests = []
        failedTests = []
        # used to predict accuracy of the network
        acTestPred = []
        acTestTrue = []

        # Build a test sequence form each word
        for index, val in enumerate(testingPara):
            tmpTestSeq = []
            target = None
            # grab the next 3 words after
            if(index < len(testingPara)-(self.VectorSizeSS+1)):
                for index2 in range(0, self.VectorSizeSS):
                    tmpTestSeq.append(testingPara[index+index2])
                target = testingPara[index+self.VectorSizeSS]
                # convert to natural language object
                nloTester = NaturalLanguageObject(tmpTestSeq)
                nloTarget = NaturalLanguageObject([target])
                # get nerual network prediction
                normalPred = self.neuralNetworkSS.getPrediction(nloTester.sentenceNormalised)
                prediction = str(nloTester.tokeniseNormals([normalPred]))
                comp = str(nloTarget.sentenceTags)

                cTrue = nloTarget.sentenceNormalised[0]
                acTestTrue.append(cTrue*100)
                acTestPred.append(normalPred*100)

                #if first letters match, this means 'NN' will match with 'NNS'
                if(prediction[2] == comp[2]):
                    #filter for probability
                    probList = self.neuralNetworkSS.getPredictionProbability(nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if(val > prob):
                            prob = val
                    passedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                    + prediction  + " " +str(prob*100) + "%")
                else:
                    probList = self.neuralNetworkSS.getPredictionProbability(nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if(val > prob):
                            prob = val
                    # if accuracy s less than 30% add to failed list
                    if(prob < 0.3):
                        failedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                        + prediction  + " " +str(prob*100) + "%")
                    else:
                        # if probability is more than 60% its probably passed
                        if(prob > 0.6):
                            passedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                            + prediction  + " " +str(prob*100) + "%")
                        else:
                            nonFatalTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                            + prediction  + " " +str(prob*100) + "%")

        # print results
        print("\n")
        print("********** TestSentenceStructuring() **********")
        print("\n")
        ConsoleOutput.printUnderline("Failed Tests: (" + str(len(failedTests)) + "/" + str(len(testingPara)) + ")")
        for val in failedTests:
            ConsoleOutput.printRed(val)
        print("\n")
        ConsoleOutput.printUnderline("Non-Fatal failed Tests: (" + str(len(nonFatalTests)) + "/" + str(len(testingPara)) + ")")
        for val in nonFatalTests:
            ConsoleOutput.printYellow(val)
        print("\n")
        ConsoleOutput.printUnderline("Passed Tests: (" + str(len(passedTests)) + "/" + str(len(testingPara)) + ")")
        for val in passedTests:
            ConsoleOutput.printGreen(val)
        print("\n")

        nnAccuracy = accuracy_score(np.array(acTestTrue).astype(int), np.array(acTestPred).astype(int))
        ConsoleOutput.printYellow("Passed: " + str(len(passedTests)) + "   Non-Fatals: " + str(len(nonFatalTests)) + "   Fails: " + str(len(failedTests)))
        ConsoleOutput.printYellow("NeuralNetork accuracy: " + str(round(nnAccuracy*100,1)) + "%")
        print("\n")