Example #1
0
    def test_aggregator_order_preserved(self):

        batch_size = 10
        num_processes = 2
        num_rows = batch_size * num_processes * 10

        self.options.aggregators = [
            _CountMultiples(-1),
            _CountMultiples(0),
            _CountMultiples(0.5),
            _CountMultiples(1),
            ca.CountsAggregator(),
            _CountMultiples(10)
        ]

        self.options.num_processes = num_processes
        self.options.num_rows = num_rows
        self.options.batch_size = batch_size

        self.worker = gw.Worker(self.options, self.dummy_logger,
                                self.dist_holder)

        x = self.worker.start()
        goal_results = [
            -num_rows, 0, num_rows / 2, num_rows, num_rows, 10 * num_rows
        ]

        self.assertListEqual(x, goal_results)
Example #2
0
    def test_generate_aggregate_results(self):

        self.options.aggregators = [ca.CountsAggregator()]

        for num_processes in [1, 2]:

            self.options.num_processes = num_processes

            self.worker = gw.Worker(self.options, self.dummy_logger,
                                    self.dist_holder)

            result = self.worker.start()

            correct_result = [self.num_rows]

            self.assertListEqual(result, correct_result)
Example #3
0
    def test_aggregation_singleproc(self):
        batch_size = 10
        num_processes = 1
        aggregator_name = 'counts_aggregator'

        self.options.num_processes = num_processes
        num_rows = batch_size * num_processes * 4

        self.options.aggregators = [ca.CountsAggregator()]

        self.options.num_rows = num_rows
        self.options.batch_size = batch_size

        self.worker = gw.Worker(self.options, self.dummy_logger,
                                self.dist_holder)

        x = self.worker.start()
        self.assertListEqual(x, [num_rows])
Example #4
0
    def test_aggregation_multiproc(self):

        batch_size = 10
        num_processes = 2

        self.options.num_processes = num_processes
        num_rows = batch_size * num_processes * 2

        counts_agg = ca.CountsAggregator()
        self.options.aggregators = [counts_agg]

        self.options.num_rows = num_rows
        self.options.batch_size = batch_size

        self.worker = gw.Worker(self.options, self.dummy_logger,
                                self.dist_holder)

        x = self.worker.start()
        self.assertListEqual(x, [num_rows])
    def setUp(self):
        
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)
 
        learner_options = Options()
        learner_options.verbose = False
        self.learner_options = learner_options


        engine_options = gw.DataGeneratorOptions()
        
        counts_agg = ca.CountsAggregator()
        
        engine_options.aggregators = [counts_agg]

        self.engine_options = engine_options

        dummy_logger = logging.getLogger('dummy')
        dummy_logger.addHandler(logging.NullHandler())

        pums_files = \
            [("mock pums", 
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 dummy_logger,
                                                 pums_files)
            
        names_files = \
            [('male_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt', 
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes', 
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    dummy_logger,
                                                    zipcode_files)
        
        text_files = \
            [('mock_text', 
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options, 
                                                  dummy_logger, 
                                                  text_files)

        streets_files = \
            [('mock street file', 
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(learner_options, 
                                                               dummy_logger, 
                                                               streets_files)
        
        dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.dist_holder = dist_holder
        self.data_generator_engine = \
            data_generator_engine.DataGeneratorEngine(engine_options,    
                                                      dist_holder)    
    def setUp(self):

        self.aggregator = ca.CountsAggregator()