def make_dist_holder(self): learner_options = Options() learner_options.verbose = False pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists \ (learner_options, self.dummy_logger, streets_files) dist_holder = \ learn_distributions.make_distribution_holder(learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.dist_holder = dist_holder
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) #set up intitialization values class Object(object): pass self.learner_options = Object() self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(self.learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(self.learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(self.learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(self.learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(self.learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(self.learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.fields_to_gen = [ sv.VARS.SEX, sv.VARS.FIRST_NAME, sv.VARS.CITIZENSHIP, sv.VARS.RACE, sv.VARS.STATE, sv.VARS.ZIP_CODE, sv.VARS.AGE, sv.VARS.DOB, sv.VARS.SSN, sv.VARS.LAST_UPDATED ] fields = [ sv.VARS.FIRST_NAME, sv.VARS.ZIP_CODE, sv.VARS.SSN, sv.VARS.LAST_UPDATED ] dist_dict = self.dist_holder.dist_dict dists = [ dist_dict[sv.VARS.FIRST_NAME], dist_dict[sv.VARS.ZIP_CODE], dist_dict[sv.VARS.SSN], dist_dict[sv.VARS.LAST_UPDATED] ] other_fields = ['no_queries', 'r_lower', 'r_upper', 'type'] other_cols = [[3, 10, 100, 'less'], [3, 10, 100, 'range'], [3, 10, 100, 'greater']] self.generator = rqg.RangeQueryGenerator("P2", 'range', ["LL"], dists, fields, 1000, 100, other_fields, other_cols)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) learner_options = Options() learner_options.verbose = False self.learner_options = learner_options engine_options = gw.DataGeneratorOptions() counts_agg = ca.CountsAggregator() engine_options.aggregators = [counts_agg] self.engine_options = engine_options dummy_logger = logging.getLogger('dummy') dummy_logger.addHandler(logging.NullHandler()) pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(learner_options, dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(learner_options, dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(learner_options, dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(learner_options, dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(learner_options, dummy_logger, streets_files) dist_holder = \ learn_distributions.make_distribution_holder(learner_options, dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.dist_holder = dist_holder self.data_generator_engine = \ data_generator_engine.DataGeneratorEngine(engine_options, dist_holder)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) #set up intitialization values class Object(object): pass self.learner_options = Object() self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(self.learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(self.learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(self.learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(self.learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(self.learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(self.learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.fields_to_gen = [ sv.VARS.SEX, sv.VARS.FIRST_NAME, sv.VARS.LAST_NAME ] sub_cat = 'eq' fields = [sv.sql_name_to_enum('fname'), sv.sql_name_to_enum('lname')] dists1 = [ self.dist_holder.dist_dict[sv.VARS.FIRST_NAME], self.dist_holder.dist_dict[sv.VARS.LAST_NAME] ] other_fields = ['no_queries', 'r_lower', 'r_upper'] other_cols = [[5, 1, 10], [5, 11, 100]] self.generator = eqg.EqualityQueryGenerator('EQ', sub_cat, ["LL"], dists1, fields, 1000, 100, other_fields, other_cols)
def setUp(self): learner_options = Options() learner_options.verbose = False self.learner_options = learner_options class GenerateEverything(object): def fields_needed(self): return sv.VAR_GENERATION_ORDER def map(self, row_dict): return None @staticmethod def reduce(r1, r2): return None def done(self): pass def start(self): pass engine_options = gw.DataGeneratorOptions() engine_options.aggregators = [GenerateEverything()] self.engine_options = engine_options dummy_logger = logging.getLogger('dummy') dummy_logger.addHandler(logging.NullHandler()) pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(learner_options, dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(learner_options, dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(learner_options, dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(learner_options, dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(learner_options, dummy_logger, streets_files) dist_holder = \ learn_distributions.make_distribution_holder(learner_options, dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.dist_holder = dist_holder self.data_generator_engine = \ data_generator_engine.DataGeneratorEngine(engine_options, dist_holder)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) #set up intitialization values class Object(object): pass self.learner_options = Object() self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(self.learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(self.learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(self.learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(self.learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(self.learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(self.learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.fields_to_gen = [ sv.VARS.SEX, sv.VARS.FOO, sv.VARS.LAST_NAME, sv.VARS.CITIZENSHIP, sv.VARS.AGE, sv.VARS.INCOME, sv.VARS.RACE, sv.VARS.STATE, sv.VARS.WEEKS_WORKED, sv.VARS.HOURS_WORKED, sv.VARS.MILITARY_SERVICE, sv.VARS.MARITAL_STATUS, sv.VARS.GRADE_ENROLLED, sv.VARS.LANGUAGE, sv.VARS.FIRST_NAME, sv.VARS.ZIP_CODE, sv.VARS.CITY, sv.VARS.STREET_ADDRESS, sv.VARS.DOB ] other_fields = [ 'no_queries', 'r_lower', 'r_upper', 'num_clauses', 'tm_lower', 'tm_upper' ] other_cols = [[5, 10, 100, 2, 100, 1000], [5, 1, 10, 2, 10, 100]] self.generator = aqg.AndQueryGenerator( "P1", 'and-eq', ["LL"], self.dist_holder.dist_dict.values(), self.dist_holder.dist_dict.keys(), 1000, 100, other_fields, other_cols)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) #set up intitialization values class Object(object): pass self.learner_options = Object() self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(self.learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(self.learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(self.learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(self.learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(self.learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(self.learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.fields_to_gen = [sv.VARS.NOTES2] other_fields = ['no_queries', 'r_lower', 'r_upper', 'distance'] other_cols = [[5, 1, 10, 50], [5, 1, 10, 50]] self.generator = aqg.AlarmQueryGenerator( "P9", 'alarm', ["LL"], [self.dist_holder.dist_dict[sv.VARS.NOTES2]], self.fields_to_gen, 1000, 100, other_fields, other_cols)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) class Object(object): pass self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() self.num_rows = 100 self.options = gw.DataGeneratorOptions(random_seed=self.seed, num_processes=2, num_rows=self.num_rows, verbose=False, aggregators=[], batch_size=5) # Build the distribution-holder learner_options = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine) self.worker = gw.Worker(self.options, self.dummy_logger, self.dist_holder)
def setUp(self): self.seed = int(time.time()) self.seed_msg = "Random seed used for this test: %s" % self.seed self.longMessage = True spar_random.seed(self.seed) class Object(object): pass self.dummy_logger = logging.getLogger('dummy') self.dummy_logger.addHandler(logging.NullHandler()) self.dummy_object = Object() # Build the distribution-holder learner_options = Object() pums_files = \ [("mock pums", stringio.StringIO(mock_data_files.mock_pums_data))] pums_dict = \ learn_distributions.learn_pums_dists(learner_options, self.dummy_logger, pums_files) names_files = \ [('male_first_names.txt', stringio.StringIO(mock_data_files.mock_male_first_names)), ('female_first_names.txt', stringio.StringIO(mock_data_files.mock_female_first_names)), ('last_names.txt', stringio.StringIO(mock_data_files.mock_last_names))] names_dict = \ learn_distributions.learn_name_dists(learner_options, self.dummy_logger, names_files) zipcode_files = \ [('mock_zipcodes', stringio.StringIO(mock_data_files.mock_zipcodes))] zipcode_dict = \ learn_distributions.learn_zipcode_dists(learner_options, self.dummy_logger, zipcode_files) text_files = \ [('mock_text', stringio.StringIO(mock_data_files.mock_text_files))] text_engine = \ learn_distributions.train_text_engine(learner_options, self.dummy_logger, text_files) streets_files = \ [('mock street file', stringio.StringIO(mock_data_files.mock_street_names))] address_dict = \ learn_distributions.learn_street_address_dists(learner_options, self.dummy_logger, streets_files) self.dist_holder = \ learn_distributions.make_distribution_holder(learner_options, self.dummy_logger, pums_dict, names_dict, zipcode_dict, address_dict, text_engine)