class FeatureArrayToMatrix():
    def __init__(self):
        self.termDocumentMatrix = TermDocumentMatrix()

    def convertFeatureArray(self, featureArray):

        self.nameDictMap = featureArray.vecs
        self.docNames = list(self.nameDictMap.d.keys())
        self.allSymbolsDict = featureArray.allSymbols
        return self.convert()

    def convert(self):
        numberOfTerms = self._getNumberOfTerms()
        numberOfDocuments = self._getNumberOfDocuments()

        self.termDocumentMatrix.setDimensions(numberOfTerms, numberOfDocuments)
        self.termDocumentMatrix.setDocuments(self.docNames)

        for n in range(numberOfDocuments):
            doc = self._getNthDocument(n)
            for term in doc.keys():
                if not self.termDocumentMatrix.isTermKnown(term):
                    self.termDocumentMatrix.addTerm(term)
                self.termDocumentMatrix.incCoefficient(term, n, doc[term])

        return self.termDocumentMatrix

    def _getNthDocument(self, n):
        return self.nameDictMap.d[self.docNames[n]]

    def _getNumberOfTerms(self):
        return self.allSymbolsDict.getNumberOfEntries()

    def _getNumberOfDocuments(self):
        return self.nameDictMap.getNumberOfEntries()
예제 #2
0
class FeatureArrayToMatrix():
    
    def __init__(self):
        self.termDocumentMatrix = TermDocumentMatrix()
    
    def convertFeatureArray(self, featureArray):
        
        self.nameDictMap = featureArray.vecs
        self.docNames = list(self.nameDictMap.d.keys())
        self.allSymbolsDict = featureArray.allSymbols
        return self.convert()
        
    def convert(self):
        numberOfTerms = self._getNumberOfTerms()
        numberOfDocuments = self._getNumberOfDocuments()
                
        self.termDocumentMatrix.setDimensions(numberOfTerms, numberOfDocuments)
        self.termDocumentMatrix.setDocuments(self.docNames)
        
        for n in range(numberOfDocuments):
            doc = self._getNthDocument(n)
            for term in doc.keys():
                if not self.termDocumentMatrix.isTermKnown(term):
                    self.termDocumentMatrix.addTerm(term)
                self.termDocumentMatrix.incCoefficient(term, n, doc[term])

        return self.termDocumentMatrix

    def _getNthDocument(self, n):
        return self.nameDictMap.d[self.docNames[n]]
    
    def _getNumberOfTerms(self):
        return self.allSymbolsDict.getNumberOfEntries()
    
    def _getNumberOfDocuments(self):
        return self.nameDictMap.getNumberOfEntries()
 def __init__(self):
     self.termDocumentMatrix = TermDocumentMatrix()
예제 #4
0
 def __init__(self):
     self.termDocumentMatrix = TermDocumentMatrix()