def make_dist_holder(self):
        learner_options = Options()
        learner_options.verbose = False

        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 self.dummy_logger,
                                                 pums_files)

        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 self.dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options,
                                                  self.dummy_logger,
                                                  text_files)

        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists \
                (learner_options,
                 self.dummy_logger,
                 streets_files)

        dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.dist_holder = dist_holder
    def setUp(self):
                
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        zipcode_files = \
            [('mock_zipcodes', 
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        
        
        self.log = stringio.StringIO()
        dummy_logger = logging.getLogger('dummy')
        dummy_logger.addHandler(logging.StreamHandler(self.log))
        dummy_logger.setLevel(logging.DEBUG)
        
        learner_options = Options()
        learner_options.verbose = True
        
        self.zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    dummy_logger,
                                                    zipcode_files)
Beispiel #3
0
    def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        #set up intitialization values
        class Object(object):
            pass

        self.learner_options = Object()
        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()
        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 pums_files)
        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 names_files)
        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(self.learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(self.learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(self.learner_options,
                                                               self.dummy_logger,
                                                               streets_files)
        self.dist_holder = \
            learn_distributions.make_distribution_holder(self.learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.fields_to_gen = [
            sv.VARS.SEX, sv.VARS.FIRST_NAME, sv.VARS.CITIZENSHIP, sv.VARS.RACE,
            sv.VARS.STATE, sv.VARS.ZIP_CODE, sv.VARS.AGE, sv.VARS.DOB,
            sv.VARS.SSN, sv.VARS.LAST_UPDATED
        ]

        fields = [
            sv.VARS.FIRST_NAME, sv.VARS.ZIP_CODE, sv.VARS.SSN,
            sv.VARS.LAST_UPDATED
        ]
        dist_dict = self.dist_holder.dist_dict
        dists = [
            dist_dict[sv.VARS.FIRST_NAME], dist_dict[sv.VARS.ZIP_CODE],
            dist_dict[sv.VARS.SSN], dist_dict[sv.VARS.LAST_UPDATED]
        ]
        other_fields = ['no_queries', 'r_lower', 'r_upper', 'type']
        other_cols = [[3, 10, 100, 'less'], [3, 10, 100, 'range'],
                      [3, 10, 100, 'greater']]
        self.generator = rqg.RangeQueryGenerator("P2", 'range', ["LL"], dists,
                                                 fields, 1000, 100,
                                                 other_fields, other_cols)
    def setUp(self):
        
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)
 
        learner_options = Options()
        learner_options.verbose = False
        self.learner_options = learner_options


        engine_options = gw.DataGeneratorOptions()
        
        counts_agg = ca.CountsAggregator()
        
        engine_options.aggregators = [counts_agg]

        self.engine_options = engine_options

        dummy_logger = logging.getLogger('dummy')
        dummy_logger.addHandler(logging.NullHandler())

        pums_files = \
            [("mock pums", 
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 dummy_logger,
                                                 pums_files)
            
        names_files = \
            [('male_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt', 
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes', 
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    dummy_logger,
                                                    zipcode_files)
        
        text_files = \
            [('mock_text', 
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options, 
                                                  dummy_logger, 
                                                  text_files)

        streets_files = \
            [('mock street file', 
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(learner_options, 
                                                               dummy_logger, 
                                                               streets_files)
        
        dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.dist_holder = dist_holder
        self.data_generator_engine = \
            data_generator_engine.DataGeneratorEngine(engine_options,    
                                                      dist_holder)    
Beispiel #5
0
    def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        #set up intitialization values
        class Object(object):
            pass

        self.learner_options = Object()
        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()
        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 pums_files)
        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 names_files)
        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(self.learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(self.learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(self.learner_options,
                                                               self.dummy_logger,
                                                               streets_files)
        self.dist_holder = \
            learn_distributions.make_distribution_holder(self.learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.fields_to_gen = [
            sv.VARS.SEX, sv.VARS.FIRST_NAME, sv.VARS.LAST_NAME
        ]
        sub_cat = 'eq'
        fields = [sv.sql_name_to_enum('fname'), sv.sql_name_to_enum('lname')]
        dists1 = [
            self.dist_holder.dist_dict[sv.VARS.FIRST_NAME],
            self.dist_holder.dist_dict[sv.VARS.LAST_NAME]
        ]
        other_fields = ['no_queries', 'r_lower', 'r_upper']
        other_cols = [[5, 1, 10], [5, 11, 100]]
        self.generator = eqg.EqualityQueryGenerator('EQ', sub_cat, ["LL"],
                                                    dists1, fields, 1000, 100,
                                                    other_fields, other_cols)
    def setUp(self):

        learner_options = Options()
        learner_options.verbose = False
        self.learner_options = learner_options

        class GenerateEverything(object):
            def fields_needed(self):
                return sv.VAR_GENERATION_ORDER

            def map(self, row_dict):
                return None

            @staticmethod
            def reduce(r1, r2):
                return None

            def done(self):
                pass

            def start(self):
                pass

        engine_options = gw.DataGeneratorOptions()
        engine_options.aggregators = [GenerateEverything()]

        self.engine_options = engine_options

        dummy_logger = logging.getLogger('dummy')
        dummy_logger.addHandler(logging.NullHandler())

        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 dummy_logger,
                                                 pums_files)

        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options,
                                                  dummy_logger,
                                                  text_files)

        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(learner_options,
                                                               dummy_logger,
                                                               streets_files)

        dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.dist_holder = dist_holder
        self.data_generator_engine = \
            data_generator_engine.DataGeneratorEngine(engine_options,
                                                      dist_holder)
Beispiel #7
0
    def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        #set up intitialization values
        class Object(object):
            pass

        self.learner_options = Object()
        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()
        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 pums_files)
        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 names_files)
        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(self.learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(self.learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(self.learner_options,
                                                               self.dummy_logger,
                                                               streets_files)
        self.dist_holder = \
            learn_distributions.make_distribution_holder(self.learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.fields_to_gen = [
            sv.VARS.SEX, sv.VARS.FOO, sv.VARS.LAST_NAME, sv.VARS.CITIZENSHIP,
            sv.VARS.AGE, sv.VARS.INCOME, sv.VARS.RACE, sv.VARS.STATE,
            sv.VARS.WEEKS_WORKED, sv.VARS.HOURS_WORKED,
            sv.VARS.MILITARY_SERVICE, sv.VARS.MARITAL_STATUS,
            sv.VARS.GRADE_ENROLLED, sv.VARS.LANGUAGE, sv.VARS.FIRST_NAME,
            sv.VARS.ZIP_CODE, sv.VARS.CITY, sv.VARS.STREET_ADDRESS, sv.VARS.DOB
        ]
        other_fields = [
            'no_queries', 'r_lower', 'r_upper', 'num_clauses', 'tm_lower',
            'tm_upper'
        ]
        other_cols = [[5, 10, 100, 2, 100, 1000], [5, 1, 10, 2, 10, 100]]
        self.generator = aqg.AndQueryGenerator(
            "P1", 'and-eq', ["LL"], self.dist_holder.dist_dict.values(),
            self.dist_holder.dist_dict.keys(), 1000, 100, other_fields,
            other_cols)
Beispiel #8
0
    def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        #set up intitialization values
        class Object(object):
            pass

        self.learner_options = Object()
        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()
        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 pums_files)
        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(self.learner_options,
                                                 self.dummy_logger,
                                                 names_files)
        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(self.learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(self.learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(self.learner_options,
                                                               self.dummy_logger,
                                                               streets_files)
        self.dist_holder = \
            learn_distributions.make_distribution_holder(self.learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)
        self.fields_to_gen = [sv.VARS.NOTES2]
        other_fields = ['no_queries', 'r_lower', 'r_upper', 'distance']
        other_cols = [[5, 1, 10, 50], [5, 1, 10, 50]]
        self.generator = aqg.AlarmQueryGenerator(
            "P9", 'alarm', ["LL"],
            [self.dist_holder.dist_dict[sv.VARS.NOTES2]], self.fields_to_gen,
            1000, 100, other_fields, other_cols)
Beispiel #9
0
    def setUp(self):

        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        class Object(object):
            pass

        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()

        self.num_rows = 100
        self.options = gw.DataGeneratorOptions(random_seed=self.seed,
                                               num_processes=2,
                                               num_rows=self.num_rows,
                                               verbose=False,
                                               aggregators=[],
                                               batch_size=5)

        # Build the distribution-holder
        learner_options = Object()

        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 self.dummy_logger,
                                                 pums_files)

        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 self.dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(learner_options,
                                                               self.dummy_logger,
                                                               streets_files)



        self.dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)

        self.worker = gw.Worker(self.options, self.dummy_logger,
                                self.dist_holder)
Beispiel #10
0
    def setUp(self):

        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        class Object(object):
            pass

        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        self.dummy_object = Object()

        # Build the distribution-holder
        learner_options = Object()

        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 self.dummy_logger,
                                                 pums_files)

        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 self.dummy_logger,
                                                 names_files)

        zipcode_files = \
            [('mock_zipcodes',
              stringio.StringIO(mock_data_files.mock_zipcodes))]
        zipcode_dict = \
            learn_distributions.learn_zipcode_dists(learner_options,
                                                    self.dummy_logger,
                                                    zipcode_files)

        text_files = \
            [('mock_text',
              stringio.StringIO(mock_data_files.mock_text_files))]
        text_engine = \
            learn_distributions.train_text_engine(learner_options,
                                                  self.dummy_logger,
                                                  text_files)
        streets_files = \
            [('mock street file',
              stringio.StringIO(mock_data_files.mock_street_names))]
        address_dict = \
                learn_distributions.learn_street_address_dists(learner_options,
                                                               self.dummy_logger,
                                                               streets_files)



        self.dist_holder = \
            learn_distributions.make_distribution_holder(learner_options,
                                                         self.dummy_logger,
                                                         pums_dict,
                                                         names_dict,
                                                         zipcode_dict,
                                                         address_dict,
                                                         text_engine)