def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation(dict(iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
def getStatsForSSA(self): print "SSA" ts = time.time() sstObject = SimilarStreamAggregation(dict(self._iterateUserDocuments()), self.stream_settings["ssa_threshold"]) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return self.getEvaluationMetrics(documentClusters, te - ts)
def getStatsForSSA(self): print 'SSA' ts = time.time() sstObject = SimilarStreamAggregation( dict(self._iterateUserDocuments()), self.stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return self.getEvaluationMetrics(documentClusters, te - ts)
def performanceForCDAITAt(noOfTweets, fileName, **stream_settings): ts = time.time() sstObject = SimilarStreamAggregation( dict( iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold']) sstObject.estimate() documentClusters = list(sstObject.iterateClusters()) te = time.time() return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te - ts)
def test_estimate(self): nn = SimilarStreamAggregation(vectors, 0.99) nn.estimate() self.assertEqual([['1', '3', '2'], ['5', '7']], list(nn.iterateClusters()))