def performanceForCDAAt(noOfTweets, fileName, **stream_settings):
     clustering=HDStreaminClustering(**stream_settings)
     ts = time.time()
     clustering.cluster(TwitterIterators.iterateFromFile(fileName))
     te = time.time()
     documentClusters = [cluster.documentsInCluster.keys() for k, cluster in clustering.clusters.iteritems() if len(cluster.documentsInCluster.keys())>=stream_settings['cluster_filter_threshold']]
     return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
 def performanceForCDAITAt(noOfTweets, fileName, **stream_settings):
     ts = time.time()
     sstObject = SimilarStreamAggregation(dict(iterateTweetUsersAfterCombiningTweets(fileName, **stream_settings)), stream_settings['ssa_threshold'])
     sstObject.estimate()
     documentClusters = list(sstObject.iterateClusters())
     te = time.time()
     return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters, te-ts)
Ejemplo n.º 3
0
 def performanceForCDAITAt(noOfTweets, fileName, **stream_settings):
     ts = time.time()
     sstObject = SimilarStreamAggregation(
         dict(
             iterateTweetUsersAfterCombiningTweets(fileName,
                                                   **stream_settings)),
         stream_settings['ssa_threshold'])
     sstObject.estimate()
     documentClusters = list(sstObject.iterateClusters())
     te = time.time()
     return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters,
                                            te - ts)
Ejemplo n.º 4
0
 def performanceForCDAAt(noOfTweets, fileName, **stream_settings):
     clustering = HDStreaminClustering(**stream_settings)
     ts = time.time()
     clustering.cluster(TwitterIterators.iterateFromFile(fileName))
     te = time.time()
     documentClusters = [
         cluster.documentsInCluster.keys()
         for k, cluster in clustering.clusters.iteritems()
         if len(cluster.documentsInCluster.keys()) >=
         stream_settings['cluster_filter_threshold']
     ]
     return Evaluation.getEvaluationMetrics(noOfTweets, documentClusters,
                                            te - ts)