Example #1
0
def create_bayes_net(state_id, puma_id, output_dir, households_data, persons_data, configuration,
                     person_segmenter, household_segmenter):
    '''Create a bayes net from pums dataframes and a configuration.
    Args:
        state_id: 2-digit state fips code
        puma_id: 5-digit puma code
        output_dir: dir to write out the generated bayesian nets to
        households_data: pums households data frame
        persons_data: pums persons data frame
        configuration: specifies the structure of the bayes net
        person_segmenter: function of inputs data to segment on a person variable
        household_segmenter: function of inputs data to segment on a household variable
    Returns:
        household and person bayesian models
    '''
    # Write the persons bayes net to disk
    person_training_data = SegmentedData.from_data(
        cleaned_data=persons_data,
        fields=list(configuration.person_fields),
        weight_field=inputs.PERSON_WEIGHT.name,
        segmenter=person_segmenter
    )
    person_model = BayesianNetworkModel.train(
        input_data=person_training_data,
        structure=configuration.person_structure,
        fields=configuration.person_fields
    )

    person_model_filename = os.path.join(
                output_dir, FILE_PATTERN.format(state_id, puma_id, 'person_model.json')
            )
    person_model.write(person_model_filename)

    # Write the households bayes net to disk
    household_training_data = SegmentedData.from_data(
        cleaned_data=households_data,
        fields=list(configuration.household_fields),
        weight_field=inputs.HOUSEHOLD_WEIGHT.name,
        segmenter=household_segmenter,
    )
    household_model = BayesianNetworkModel.train(
        input_data=household_training_data,
        structure=configuration.household_structure,
        fields=configuration.household_fields
    )

    household_model_filename = os.path.join(
                output_dir, FILE_PATTERN.format(state_id, puma_id, 'household_model.json')
            )
    household_model.write(household_model_filename)
    return household_model, person_model
 def test_generate_with_prior(self):
     network = BayesianNetworkModel.train(
         bayesnets.SegmentedData({'one_bucket': [('35-64', 'F', '40k+')]}),
         self._person_structure(),
         self._person_fields(),
         prior_data={('35-64', 'F', '40k+')})
     person = network.generate('one_bucket', ())[0]
     self.assertEquals(person, ('35-64', 'F', '40k+'))