コード例 #1
0
ファイル: CommentClassify.py プロジェクト: hejy12/TestNLP
    def getSplitData(self,node):
        qid = DataExtrator.getAttrValue(node, 'QID')
        qcategory = DataExtrator.getAttrValue(node, 'QCATEGORY')
        quserid = DataExtrator.getAttrValue(node, 'QUSERID')
        qtype = DataExtrator.getAttrValue(node, 'QTYPE')
        qgold_yn = DataExtrator.getAttrValue(node, 'QGOLD_YN')
#         print qid,qcategory,quserid,qtype,qgold_yn
        qBody=DataExtrator.getNodeValue(DataExtrator.getXMLNode(node, 'QBody')[0])
        commentNodeList=DataExtrator.getXMLNode(node, 'Comment')
        return qid,qcategory,quserid,qtype,qgold_yn,qBody,commentNodeList
コード例 #2
0
ファイル: CommentClassify.py プロジェクト: hejy12/TestNLP
 def getTestData(self,testQtype,testQBody,testCommentNodeList,trainTopNWords):
     testData=[]
     cidList=[]
     for commentNode in testCommentNodeList:
         cid = DataExtrator.getAttrValue(commentNode, 'CID')
         cuser = DataExtrator.getAttrValue(commentNode, 'CUSERID')
         cgold = DataExtrator.getAttrValue(commentNode, 'CGOLD')
         cgold_yn = DataExtrator.getAttrValue(commentNode, 'CGOLD_YN')
         cBody=DataExtrator.getNodeValue(DataExtrator.getXMLNode(commentNode, 'CBody')[0])
         testTopNWords,splitWords=self.selectTopNWords(testQBody+cBody);
         allFeatures=self.getFeatures(trainTopNWords,[[testTopNWords]]);
         #这里仅为利用已有方法而做的结构变化
         testData.append(allFeatures[0][0])
         cidList.append(cid)
     return testData,cidList
コード例 #3
0
ファイル: CommentClassify.py プロジェクト: hejy12/TestNLP
    def generateFeatures(self,qtype,qBody,commentNodeList):
        allList=[]
        goodComments=[]
        badComments=[]
        potentialComments=[]
        dialogueComments=[]
        nonEnglishComments=[]
        otherComments=[]
        yesComments=[]
        noComments=[]
        unsureComments=[]
        naComments=[]
        noDisplayComments=[]
#         qBodyFeatures=DataExtrator.selectTopNWords(qBody);
        
        for node in commentNodeList:
            cid = DataExtrator.getAttrValue(node, 'CID')
            cuser = DataExtrator.getAttrValue(node, 'CUSERID')
            cgold = DataExtrator.getAttrValue(node, 'CGOLD')
            cgold_yn = DataExtrator.getAttrValue(node, 'CGOLD_YN')
            cBody=DataExtrator.getNodeValue(DataExtrator.getXMLNode(node, 'CBody')[0])
            
            if qtype == general:
                if cgold==questionMarks[0]:
                    goodComments.append(cBody)
                elif cgold==questionMarks[1]:
                    badComments.append(cBody)
                elif cgold==questionMarks[2]:
                    potentialComments.append(cBody)
                elif cgold==questionMarks[3]:
                    dialogueComments.append(cBody)
                elif cgold==questionMarks[4]:
                    nonEnglishComments.append(cBody)
                elif cgold==questionMarks[5]:
                    otherComments.append(cBody)
                #NA问题没必要进行相似运算
#                 elif cgold==questionMarks[9]:
#                     naComments.append(cBody)
                else:
                    noDisplayComments.append(cBody)
            elif qtype == yesno:#yes_no型问题暂未考虑ctype
                if cgold_yn==questionMarks[6]:
                    yesComments.append(cBody)
                elif cgold_yn==questionMarks[7]:
                    noComments.append(cBody)
                elif cgold_yn==questionMarks[8]:
                    unsureComments.append(cBody)
#                 elif cgold_yn==questionMarks[9]:
#                     naComments.append(cBody)
                else:
                    noDisplayComments.append(cBody)
            #暂时不加入
#             noDisplayComments.append(cBody)
        qBodyList=[qBody]
        allList.append(qBodyList)
        
        allList.append(goodComments)
        allList.append(badComments)
        allList.append(potentialComments)
        allList.append(dialogueComments)
        allList.append(nonEnglishComments)
        allList.append(otherComments)
        
        allList.append(yesComments)
        allList.append(noComments)
        allList.append(unsureComments)
        
#         allList.append(naComments)#N/A答案没必要进行相似运算
        allList.append(noDisplayComments)
        
        topNWords,splitWords=self.selectTopNWords(allList)
        splitWords.pop(0)
        allFeatures=self.getFeatures(topNWords,splitWords);
        return allFeatures,topNWords;