예제 #1
0
    def analyze(self, unknownDocument):
        # unknownDocument is a single doc, type Document.
        results = dict()
        unknownDocHistogram: dict = histograms.normalizeHistogram(
            histograms.generateAbsoluteHistogram(unknownDocument))
        #unknownDocHistogramNp = np.asarray(list(unknownDocHistogram.items()))
        results = dict()
        if self.mode == "author":
            for author in self._histograms:
                authorResult = 0  # numerial result for an author (mean histogram)
                for item in self._histograms[author]:
                    if unknownDocHistogram.get(item) != None:
                        authorResult -= self._histograms[author][
                            item] * math.log(unknownDocHistogram[item])
                results[author] = authorResult

        elif self.mode == "document":
            for author in self._histograms:
                for doc in self._histograms[author]:
                    docResult = 0  # numerical result for a single document
                    for item in doc:
                        if unknownDocHistogram.get(item) != None:
                            docResult -= self._histograms[author][doc][
                                item] * math.log(unknownDocHistogram[item])
                    results[doc] = docResult

        return results
예제 #2
0
 def analyze(self, unknownDocument):
     '''Compare a normalized histogram of unknownDocument against the normalized known document histograms and return a dictionary of distances.'''
     results = dict()
     for author, knownHist in self._authorHistograms.items():
         results[author] = self.distance.distance(
             histograms.normalizeHistogram(
                 histograms.generateAbsoluteHistogram(unknownDocument)),
             knownHist)
     return results