def analyze(self, unknownDocument): # unknownDocument is a single doc, type Document. results = dict() unknownDocHistogram: dict = histograms.normalizeHistogram( histograms.generateAbsoluteHistogram(unknownDocument)) #unknownDocHistogramNp = np.asarray(list(unknownDocHistogram.items())) results = dict() if self.mode == "author": for author in self._histograms: authorResult = 0 # numerial result for an author (mean histogram) for item in self._histograms[author]: if unknownDocHistogram.get(item) != None: authorResult -= self._histograms[author][ item] * math.log(unknownDocHistogram[item]) results[author] = authorResult elif self.mode == "document": for author in self._histograms: for doc in self._histograms[author]: docResult = 0 # numerical result for a single document for item in doc: if unknownDocHistogram.get(item) != None: docResult -= self._histograms[author][doc][ item] * math.log(unknownDocHistogram[item]) results[doc] = docResult return results
def analyze(self, unknownDocument): '''Compare a normalized histogram of unknownDocument against the normalized known document histograms and return a dictionary of distances.''' results = dict() for author, knownHist in self._authorHistograms.items(): results[author] = self.distance.distance( histograms.normalizeHistogram( histograms.generateAbsoluteHistogram(unknownDocument)), knownHist) return results