def create_bayes_net(state_id, puma_id, output_dir, households_data, persons_data, configuration, person_segmenter, household_segmenter): '''Create a bayes net from pums dataframes and a configuration. Args: state_id: 2-digit state fips code puma_id: 5-digit puma code output_dir: dir to write out the generated bayesian nets to households_data: pums households data frame persons_data: pums persons data frame configuration: specifies the structure of the bayes net person_segmenter: function of inputs data to segment on a person variable household_segmenter: function of inputs data to segment on a household variable Returns: household and person bayesian models ''' # Write the persons bayes net to disk person_training_data = SegmentedData.from_data( cleaned_data=persons_data, fields=list(configuration.person_fields), weight_field=inputs.PERSON_WEIGHT.name, segmenter=person_segmenter ) person_model = BayesianNetworkModel.train( input_data=person_training_data, structure=configuration.person_structure, fields=configuration.person_fields ) person_model_filename = os.path.join( output_dir, FILE_PATTERN.format(state_id, puma_id, 'person_model.json') ) person_model.write(person_model_filename) # Write the households bayes net to disk household_training_data = SegmentedData.from_data( cleaned_data=households_data, fields=list(configuration.household_fields), weight_field=inputs.HOUSEHOLD_WEIGHT.name, segmenter=household_segmenter, ) household_model = BayesianNetworkModel.train( input_data=household_training_data, structure=configuration.household_structure, fields=configuration.household_fields ) household_model_filename = os.path.join( output_dir, FILE_PATTERN.format(state_id, puma_id, 'household_model.json') ) household_model.write(household_model_filename) return household_model, person_model
def test_generate_with_prior(self): network = BayesianNetworkModel.train( bayesnets.SegmentedData({'one_bucket': [('35-64', 'F', '40k+')]}), self._person_structure(), self._person_fields(), prior_data={('35-64', 'F', '40k+')}) person = network.generate('one_bucket', ())[0] self.assertEquals(person, ('35-64', 'F', '40k+'))