Esempio n. 1
0
 def GetAnswer(self, event):
     # The list used to get all the ratio of string similarity.
     ratio = []
     # Get the classifier.
     classifier = SVM.DoSVM()
     # Get the vectoricer.
     vectorizer = KeywordProcessor.DataTransformer()[1]
     # Get the PCA transformer.
     pcaTransform = PCADataProcessor.PCADataProcessor()[1]
     # Get the users' question.
     testData = self.text_user.GetValue()
     # Used to store the users' question.
     example = []
     # Split the users' question.
     seg_list = jieba.cut_for_search(testData)
     # Store the users's question.
     example.append(" ".join(seg_list))
     # Transformed users' question into the matrix.
     transDataExample = vectorizer.transform(example)
     # Store the new matrix.
     dataMatrixExample = transDataExample.toarray()
     # Compress the matrix.
     newExample = pcaTransform.transform(dataMatrixExample)
     # Get the prediction value.
     result = classifier.predict(newExample)
     # Get the question and the answer from the corresponding cluster.
     Questions = sheetForQuestion.col_values(result[0])
     Answers = sheetForAnswer.col_values(result[0])
     # Do the string similarity to get the answer.
     for item in Questions:
         ratio.append(
             difflib.SequenceMatcher(None, testData, item).quick_ratio())
     # Ouput the answer.
     wx.MessageBox(Answers[ratio.index(max(ratio))])
def CVTestingTraining():
    # Get the users' operations.
    operation = int(
        input(
            "Please choose an operation('1' for CV set, '2' for Testing set): "
        ))
    # Get all the value of the column.
    for index in range(1, 121):
        data.append(sheet.row(index)[operation].value)
    # Store the new data which has been cut.
    cutData = []
    # Store the prediction value.
    prediction = []
    # Get the classifier.
    classifier = SVM.DoSVM()
    # Get the vectoricer.
    vectorizer = KeywordProcessor.DataTransformer()[1]
    # Get the PCA transformer.
    pcaTransform = PCADataProcessor.PCADataProcessor()[1]
    # Use the jieba to split all the string.
    for index in range(0, 120):
        # Split the string.
        seg_list = jieba.cut_for_search(data[index])
        # Restore the splited string into the data.
        cutData.append(" ".join(seg_list))
        # Get the transformed data.
        transformData = vectorizer.transform(cutData)
        # Get the data matrix.
        dataMatrix = transformData.toarray()
        # Do the PCA.
        newData = pcaTransform.transform(dataMatrix)
        # Get the prediction.
        prediction.append(classifier.predict(newData)[0])
        # Clear the list.
        cutData.clear()
    # Return the prediction.
    return prediction


# Test the function.
#print(CVTestingTraining())