def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)
        #set up intitialization values 
        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())
        class Options(object):
            pass
        learner_options = Options()
        learner_options.verbose = False

        pums_files = \
            [("mock pums", 
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 self.dummy_logger,
                                                 pums_files)
            
        names_files = \
            [('male_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt', 
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt', 
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 self.dummy_logger,
                                                 names_files)

        vars = [sv.VARS.SEX,
                sv.VARS.CITIZENSHIP,
                sv.VARS.AGE,
                sv.VARS.RACE,
                sv.VARS.STATE,
                sv.VARS.FIRST_NAME,
                sv.VARS.LAST_NAME]

        var_order = vars
        var_names = [sv.VARS.to_string(x) for x in vars]
        dist_dict = { }
        dist_dict.update(pums_dict)
        dist_dict.update(names_dict)
        
        dist_holder = dh.DistributionHolder(var_order, var_names, dist_dict)
        
        fields = [sv.sql_name_to_enum('xml')]

        self._dist1 = xml_generator.XmlGenerator(dist_holder)
        dists = [self._dist1]
        other_fields = ['no_queries', 'r_lower', 'r_upper', 'path_type'] 
        other_cols_full = [[5, 1, 10, 'full']]
        other_cols_short = [[5,1, 10,'short']]
        self.full_generator = xqg.XmlQueryGenerator('P11','', ["LL"],dists, fields, 1000,
                                                    100,other_fields, other_cols_full)
        self.short_generator = xqg.XmlQueryGenerator('P11','', ["LL"],dists, fields, 1000,
                                                    100,other_fields, other_cols_short)
def make_distribution_holder(options, logger, pums_dict, names_dict,
                             zipcode_dict, address_dict, text_engine):
    """
    Take learned  distributions of demographic data, add the 'fixed' 
    (not-learned) distributions, and embed everything into a 
    DistributionHolder object.
    """

    dist_dict = {}
    # Tells the data-generator the order in which to call these distributions
    # Note: Age and fingerprint are special cases, not included here.

    var_names = [sv.VARS.to_string(x) for x in sv.VAR_GENERATION_ORDER]

    dist_dict.update(pums_dict)

    dist_dict.update(names_dict)

    dist_dict.update(zipcode_dict)

    dist_dict.update(address_dict)

    text_dist_dict = make_text_distributions(text_engine)

    dist_dict.update(text_dist_dict)

    dist_dict[sv.VARS.SSN] = bespoke_distributions.SSNDistribution()

    # Note: used to try to generate fingerprints in python
    # We're not doing that for the moment, but leaving this here
    # in case we do so again.
    #
    dist_dict[sv.VARS.FINGERPRINT] = \
        bespoke_distributions.FingerprintDistribution()

    dist_dict[sv.VARS.DOB] = \
        bespoke_distributions.DOBDistribution(dist_dict[sv.VARS.AGE])

    dist_dict[sv.VARS.LAST_UPDATED] = \
        bespoke_distributions.LastUpdatedDistribution(dist_dict[sv.VARS.DOB])

    dist_dict[sv.VARS.FOO]  = \
        bespoke_distributions.FooDistribution()

    dist_dict[sv.VARS.XML] = \
        make_xml_distribution(pums_dict, names_dict)

    dh = distribution_holder.DistributionHolder(sv.VAR_GENERATION_ORDER,
                                                var_names, dist_dict)

    return dh
Example #3
0
    def setUp(self):
        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)

        self.dummy_logger = logging.getLogger('dummy')
        self.dummy_logger.addHandler(logging.NullHandler())

        learner_options = Options()
        learner_options.verbose = False

        pums_files = \
            [("mock pums",
              stringio.StringIO(mock_data_files.mock_pums_data))]
        pums_dict = \
            learn_distributions.learn_pums_dists(learner_options,
                                                 self.dummy_logger,
                                                 pums_files)

        names_files = \
            [('male_first_names.txt',
              stringio.StringIO(mock_data_files.mock_male_first_names)),
             ('female_first_names.txt',
              stringio.StringIO(mock_data_files.mock_female_first_names)),
             ('last_names.txt',
              stringio.StringIO(mock_data_files.mock_last_names))]
        names_dict = \
            learn_distributions.learn_name_dists(learner_options,
                                                 self.dummy_logger,
                                                 names_files)

        vars = [
            sv.VARS.SEX, sv.VARS.CITIZENSHIP, sv.VARS.AGE, sv.VARS.RACE,
            sv.VARS.STATE, sv.VARS.FIRST_NAME, sv.VARS.LAST_NAME
        ]

        var_order = vars
        var_names = [sv.VARS.to_string(x) for x in vars]
        dist_dict = {}
        dist_dict.update(pums_dict)
        dist_dict.update(names_dict)

        self.dist_holder = dh.DistributionHolder(var_order, var_names,
                                                 dist_dict)
def make_xml_distribution(pums_dict, names_dict):

    # The XML generator actually uses a little distribution holder of its
    # own, internally, so we need to build that.

    vars = [
        sv.VARS.SEX, sv.VARS.CITIZENSHIP, sv.VARS.AGE, sv.VARS.RACE,
        sv.VARS.STATE, sv.VARS.FIRST_NAME, sv.VARS.LAST_NAME
    ]

    var_order = vars
    var_names = [sv.VARS.to_string(x) for x in vars]
    dist_dict = {}
    dist_dict.update(pums_dict)
    dist_dict.update(names_dict)

    dist_holder = distribution_holder.DistributionHolder(
        var_order, var_names, dist_dict)

    return xml_generator.XmlGenerator(dist_holder)
    def setUp(self):

        self.seed = int(time.time())
        self.seed_msg = "Random seed used for this test: %s" % self.seed
        self.longMessage = True
        spar_random.seed(self.seed)
        self.dummy_logger = logging.getLogger('dummy')

        #distribution holder
        dist1 = base_distribution.CompactIndependentDistribution()
        dist1.add('hello', 1)
        dist1.add('there', 99)
        vars = [VARS.FIRST_NAME]
        dists = [dist1]
        dist_dict = dict(zip(vars, dists))
        self._holder = dh.DistributionHolder(vars, ['fname'], dist_dict)

        self._file_name = StringIO.StringIO('cat,sub_cat,perf,fields,\"[\'no_queries' \
                          '\',\'r_lower\',\'r_upper\']\"\nEQ,eq,"[\'LL\']","[\'fname\']","[10, 1, 100]\"')

        self._db_size = 1000