Beispiel #1
0
 def test_dict_aggr_threads(self):
     processor = CounterProcessor(self.source_class, ThreadWorker,
                                  StrongSortedDictAggregator,
                                  SimpleSplitter)
     results = list(processor.count_words(self._test_file, 6, 128,
                                          settings))
     self._check_results(results)
Beispiel #2
0
 def test_string_source(self):
     l = FileSource.source_size(self._test_file)
     with FileSource(self._test_file, 0, l, l) as f:
         data = f.next()
     processor = CounterProcessor(StringSource, SimpleWorker, RedisAggregator, RegexpSplitter)
     results = list(processor.count_words(data, 6, 128, settings))
     self._check_results(results)
Beispiel #3
0
 def test_http_source(self):
     processor = CounterProcessor(HTTPSource, ProcessWorker,
                                  RedisAggregator, SimpleSplitter)
     base_results = list(
         processor.count_words(self._test_url, 1, 1024 * 1024, settings))
     results = list(processor.count_words(self._test_url, 6, 1024,
                                          settings))
     self._check_results(results, base_results)
Beispiel #4
0
 def test_string_source(self):
     l = FileSource.source_size(self._test_file)
     with FileSource(self._test_file, 0, l, l) as f:
         data = f.next()
     processor = CounterProcessor(StringSource, SimpleWorker,
                                  RedisAggregator, RegexpSplitter)
     results = list(processor.count_words(data, 6, 128, settings))
     self._check_results(results)
Beispiel #5
0
 def test_remove_mode_redis(self):
     processor = CounterProcessor(StringSource, SimpleWorker, RedisAggregator, RegexpSplitter)
     processor.count_words('na v ot nah pod k za v na', 1, 128, settings)
     results = list(processor.count_words('ot na pod za na ot na', 1, 128, settings, remove_mode=True))
     self.assertEqual(len(results), 3, 'Bad results length')
     self.assertEqual(results[0], ('v', 2.0))
     self.assertEqual(results[1], ('nah', 1.0))
     self.assertEqual(results[2], ('k', 1.0))
Beispiel #6
0
 def test_remove_mode_redis(self):
     processor = CounterProcessor(StringSource, SimpleWorker,
                                  RedisAggregator, RegexpSplitter)
     processor.count_words('na v ot nah pod k za v na', 1, 128, settings)
     results = list(
         processor.count_words('ot na pod za na ot na',
                               1,
                               128,
                               settings,
                               remove_mode=True))
     self.assertEqual(len(results), 3, 'Bad results length')
     self.assertEqual(results[0], ('v', 2.0))
     self.assertEqual(results[1], ('nah', 1.0))
     self.assertEqual(results[2], ('k', 1.0))
    if opt in ("-h", "--help"):
        print help_str
        sys.exit()
    elif opt == "-i":
        path1 = arg
    elif opt == "-r":
        path2 = arg
    elif opt == "-c":
        concurrency = int(arg)
    elif opt == '-b':
        read_buffer = int(arg)
    elif opt == '-a':
        settings.aggregator['agg_size'] = int(arg)
    else:
        print "Unknown command line argument: %s" % opt
        sys.exit(2)

processor = CounterProcessor(settings.source_class, ProcessWorker, settings.aggregator_class,
                             SimpleSplitter)
start_time = time.time()
results = processor.count_words(path1, concurrency, read_buffer, settings)
if path2:
    results = processor.count_words(path2, concurrency, read_buffer, settings,
                                    agg_data=processor.aggregator.agg, remove_mode=True)
result_time = time.time() - start_time

for name, value in results:
    print "%s: %s" % (name, value)
print
print "Processed in %s seconds" % result_time
Beispiel #8
0
 def test_regexp_splitter(self):
     processor = CounterProcessor(self.source_class, SimpleWorker,
                                  RedisAggregator, RegexpSplitter)
     results = list(processor.count_words(self._test_file, 6, 128,
                                          settings))
     self._check_results(results)
Beispiel #9
0
 def test_concurrency_and_buffering(self):
     processor = CounterProcessor(self.source_class, SimpleWorker,
                                  RedisAggregator, SimpleSplitter)
     results = list(processor.count_words(self._test_file, 6, 128,
                                          settings))
     self._check_results(results)
Beispiel #10
0
 def setUp(self):
     self.source_class = FileSource
     processor = CounterProcessor(self.source_class, SimpleWorker,
                                  RedisAggregator, SimpleSplitter)
     self.base_results = list(
         processor.count_words(self._test_file, 1, 1024 * 1024, settings))
Beispiel #11
0
 def test_regexp_splitter(self):
     processor = CounterProcessor(self.source_class, SimpleWorker, RedisAggregator, RegexpSplitter)
     results = list(processor.count_words(self._test_file, 6, 128, settings))
     self._check_results(results)
Beispiel #12
0
 def test_http_source(self):
     processor = CounterProcessor(HTTPSource, ProcessWorker, RedisAggregator, SimpleSplitter)
     base_results = list(processor.count_words(self._test_url, 1, 1024 * 1024, settings))
     results = list(processor.count_words(self._test_url, 6, 1024, settings))
     self._check_results(results, base_results)
Beispiel #13
0
 def test_concurrency_and_buffering(self):
     processor = CounterProcessor(self.source_class, SimpleWorker, RedisAggregator, SimpleSplitter)
     results = list(processor.count_words(self._test_file, 6, 128, settings))
     self._check_results(results)
Beispiel #14
0
 def setUp(self):
     self.source_class = FileSource
     processor = CounterProcessor(self.source_class, SimpleWorker, RedisAggregator, SimpleSplitter)
     self.base_results = list(processor.count_words(self._test_file, 1, 1024 * 1024, settings))
Beispiel #15
0
 def test_dict_aggr_threads(self):
     processor = CounterProcessor(self.source_class, ThreadWorker, StrongSortedDictAggregator, SimpleSplitter)
     results = list(processor.count_words(self._test_file, 6, 128, settings))
     self._check_results(results)