def test_can_sample_range(self): attributes = { 'example_p': ProbabilityDistribution({ 0: 0.01, 1: 0.01, 2.006: 0.01, 3: 0.01, 4: 0.01, 5: 0.01, 6: 0.88, 7: 0.01, 8: 0.01, 9: 0.01, 10: 0.01, 11: 0.01, 12: 0.01, }) } dwelling = Dwelling(attributes, self.mock_connection) dwelling.outputs = { 'example': { 'type': 'numrange', 'sampling': True, 'distribution': 'example_p' } } self.sampling_module.process(dwelling) # Should take 95% interval by default, # should round to 2 decimals. self.assertEqual(str(dwelling.attributes['example']), '[2.01, 10]')
def test_cavity_walls(self): # with cavity wall attributes = { 'bouwjaar': 1920, 'woningtype': 'vrijstaand' } p_multiplier = 1.98 dwelling = Dwelling(attributes, self.mock_connection) cavity_wall_dist = self.insulation_module.process_insulation_type(dwelling, 'cavity wall') eligible_dwellings_cavity_wall_n = 3273065 measure_n = 76784 + 114914 + 117197 + 96150 + 131324 + 132769 + 159507 + 159080 + 214035 + 281276 p_measure = p_multiplier * measure_n / eligible_dwellings_cavity_wall_n self.assertEqual(cavity_wall_dist.p((1.25, 2.175)), p_measure) # without cavity wall attributes = { 'bouwjaar': 1919, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) cavity_wall_dist = self.insulation_module.process_insulation_type(dwelling, 'cavity wall') self.assertEqual(cavity_wall_dist.mean, 0) # with recent (probably already insulated) cavity wall attributes = { 'bouwjaar': 2020, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) cavity_wall_dist = self.insulation_module.process_insulation_type(dwelling, 'cavity wall') self.assertEqual(cavity_wall_dist.mean, 0)
def test_updating_dwelling_with_placeholder_processed_by_doesnt_override( self): self.placeholder_dwelling.processed_by = ['BaseModule'] # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id} dwelling = Dwelling(attributes, self.connection) dwelling.processed_by = ['OtherModule'] self.region.add_dwelling(dwelling) self.assertEqual(dwelling.processed_by, ['OtherModule', 'BaseModule'])
def test_raises_when_adding_existing_dwelling(self): attributes_1 = {'vbo_id': '0363010000000002'} dwelling_1 = Dwelling(attributes_1, self.connection) attributes_2 = attributes_1.copy() dwelling_2 = Dwelling(attributes_2, self.connection) self.region.dwellings = [dwelling_1] # Attempt to add the same dwelling. add_dwelling_partial = partial(self.region.add_dwelling, dwelling_2) self.assertRaises(ValueError, add_dwelling_partial)
def test_can_sample_boolean(self): attributes = { 'example_p': 1 } dwelling = Dwelling(attributes, self.mock_connection) dwelling.outputs = { 'example': { 'type': 'boolean', 'sampling': True, 'distribution': 'example_p' } } self.sampling_module.process(dwelling) self.assertTrue(dwelling.attributes['example'])
def test_facade_includes_cavity_wall_values(self): # has cavity wall attributes = { 'bouwjaar': 1920, 'woningtype': 'vrijstaand' } # FACADE # Applicable WoON distribution: # 2.11: 0.548, # ... p_multiplier = 1.98 # For dwellings built in or before 2000, all measure years # from 2010 to 2019 are applicable. p_facade_measure_base_before_2000 = 35838 / 6591218 + 73097 / 6669286 + 76480 / 6739330 + 60548 / 6804459 + 84501 / 6870704 + 74448 / 6943943 + 75802 / 7027060 + 85442 / 7109692 + 100978 / 7189902 + 125197 / 7261671 p_facade_measure = p_multiplier * p_facade_measure_base_before_2000 eligible_dwellings_cavity_wall_n = 3273065 cavity_measure_n = 76784 + 114914 + 117197 + 96150 + 131324 + 132769 + 159507 + 159080 + 214035 + 281276 p_cavity_measure = p_multiplier * cavity_measure_n / eligible_dwellings_cavity_wall_n dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) # R-value is 2.11 if it was 2.11 to begin with (p=00.548) # and no measures were taken after that. self.assertAlmostEqual(dwelling.attributes['insulation_facade_r_dist'].p(2.11), 0.548 * (1 - p_facade_measure) * (1-p_cavity_measure))
def test_uses_woon_data_and_measures_for_buildings_before_2006(self): # no cavity wall attributes = { 'bouwjaar': 1919, 'woningtype': 'vrijstaand' } # FACADE # Applicable WoON distribution: # 0.36: 0.018, # 0.43: 0.469, # ... p_multiplier = 1.98 # For dwellings built in or before 2000, all measure years # from 2010 to 2019 are applicable. p_facade_measure_base_before_2000 = 35838 / 6591218 + 73097 / 6669286 + 76480 / 6739330 + 60548 / 6804459 + 84501 / 6870704 + 74448 / 6943943 + 75802 / 7027060 + 85442 / 7109692 + 100978 / 7189902 + 125197 / 7261671 p_facade_measure = p_multiplier * p_facade_measure_base_before_2000 dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) # R-value is 0.36 if it was 0.36 to begin with (p=0.469) # and no measures were taken after that (p = 1 - p_measure). self.assertAlmostEqual(dwelling.attributes['insulation_facade_r_dist'].p(0.36), 0.018 * (1 - p_facade_measure), places=4) p_roof_measure_base_before_2000 = 115398 / 6591218 + 157347 / 6669286 + 195420 / 6739330 + 124267 / 6804459 + 155099 / 6870704 + 148447 / 6943943 + 148100 / 7027060 + 164024 / 7109692 + 199784 / 7189902 + 246325 / 7261671 p_roof_measure = p_multiplier * p_roof_measure_base_before_2000 self.assertAlmostEqual(dwelling.attributes['insulation_roof_r_dist'].p(0.39), 0.025 * (1 - p_roof_measure), places=4)
def test_raises_when_adding_dwelling_outside_region(self): # dwelling with vbo_id that is not inside that region attributes = {'vbo_id': '0363010000000002'} dwelling = Dwelling(attributes, self.connection) add_dwelling_partial = partial(self.region.add_dwelling, dwelling) self.assertRaises(ValueError, add_dwelling_partial)
def test_can_replace_placeholders(self): # dwelling with same vbo_id as placeholder attributes = self.placeholder_attributes.copy() dwelling = Dwelling(attributes, self.connection) self.region.add_dwelling(dwelling) # placeholder has been replaced self.assertEqual(self.region.dwellings, [dwelling])
def test_saves_calculated_values(self): attributes = { 'bouwjaar': 1920, 'woningtype': 'vrijstaand' } dwelling1 = Dwelling(attributes, self.mock_connection) # identical dwelling dwelling2 = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling1) # We monkeypatch the method, # to see if it is called. # If it is, it will fail with a TypeError. self.insulation_module.process_insulation_type = lambda x, y: None try: self.insulation_module.process(dwelling2) except TypeError as e: self.fail(f'Should not rise TypeError "{e}"')
def test_updates_dwelling_with_placeholder_processed_by_when_adding(self): self.placeholder_dwelling.processed_by = ['BaseModule'] # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id} dwelling = Dwelling(attributes, self.connection) self.region.add_dwelling(dwelling) self.assertEqual(dwelling.processed_by, ['BaseModule'])
def test_raises_when_adding_dwelling_with_conflicting_information(self): self.placeholder_dwelling.attributes['foo'] = 'bar' # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id, 'foo': 'spam'} dwelling = Dwelling(attributes, self.connection) add_dwelling_partial = partial(self.region.add_dwelling, dwelling) self.assertRaises(ValueError, add_dwelling_partial)
def test_can_sample_distribution(self): attributes = { 'example_p': ProbabilityDistribution({ 1: 0.5, 2: 0.5 }) } dwelling = Dwelling(attributes, self.mock_connection) dwelling.outputs = { 'example': { 'type': 'double precision', 'sampling': True, 'distribution': 'example_p' } } self.sampling_module.process(dwelling) # Should take mean by default self.assertAlmostEqual(dwelling.attributes['example'], 1.5)
def test_updates_dwelling_with_placeholder_dwelling_values_when_adding( self): self.placeholder_dwelling.attributes['foo'] = 'bar' # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id} dwelling = Dwelling(attributes, self.connection) self.region.add_dwelling(dwelling) self.assertEqual(dwelling.attributes['foo'], 'bar')
def test_retains_info_when_adding_dwelling_with_same_information(self): self.placeholder_dwelling.attributes['foo'] = 'bar' # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id, 'foo': 'bar'} dwelling = Dwelling(attributes, self.connection) self.region.add_dwelling(dwelling) self.assertEqual(dwelling.attributes, attributes)
def test_does_not_raise_when_adding_dwelling_with_same_information(self): self.placeholder_dwelling.attributes['foo'] = 'bar' # dwelling with same vbo_id as placeholder attributes = {'vbo_id': self.vbo_id, 'foo': 'bar'} dwelling = Dwelling(attributes, self.connection) try: self.region.add_dwelling(dwelling) except ValueError: self.fail('Should not raise ValueError')
def test_uses_national_average_when_no_other_averages(self): pc6 = PC6('1000AA', self.connection) # No average this time pc6.attributes['energy_label_epi_log_avg'] = None buurt = Buurt('BU0000000', self.connection) buurt.attributes['energy_label_epi_log_avg'] = None attributes = { 'vbo_id': '0003010000000001', 'bouwjaar': 2020, 'woningtype': 'tussenwoning' } dwelling = Dwelling(attributes, self.connection) dwelling.regions['pc6'] = pc6 dwelling.regions['buurt'] = buurt self.energy_label_prediction_module.process(dwelling) # Exact value doesn't matter, but that it is different # from previous results is. self.assertTrue(dwelling.attributes['energy_label_epi_mean'] > 1.12)
def test_uses_building_code_for_new_buildings(self): attributes = { 'bouwjaar': 2020, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) self.assertEqual(dwelling.attributes['insulation_facade_r_dist'].p(4.5), 1) self.assertEqual(dwelling.attributes['insulation_roof_r_dist'].p(6), 1) self.assertEqual(dwelling.attributes['insulation_floor_r_dist'].p(3.5), 1) self.assertEqual(dwelling.attributes['insulation_window_r_dist'].p(1/1.65), 1)
def test_predicts_epi(self): attributes = { 'vbo_id': '0003010000000001', 'bouwjaar': 2020, 'woningtype': 'tussenwoning' } dwelling = Dwelling(attributes, self.connection) pc6 = PC6('1000AA', self.connection) pc6.attributes['energy_label_epi_log_avg'] = 0.1823215568 dwelling.regions['pc6'] = pc6 self.energy_label_prediction_module.process(dwelling) self.assertAlmostEqual(dwelling.attributes['energy_label_epi_mean'], 1.1123215698958466, places=3) epi_interval = dwelling.attributes['energy_label_epi_95'] self.assertAlmostEqual(epi_interval.lower, 0.7336717114532558) self.assertAlmostEqual(epi_interval.upper, 1.6873401901864706) self.assertEqual(dwelling.attributes['energy_label_class_mean'], 'B') # A bit of an indirect way to check that this is the EnergyLabelClassRange # from D to A. self.energy_label_class_range_mock.assert_called_with('D', 'A', bounds='[]')
def test_uses_average_of_buurt_when_no_labels_in_pc6(self): pc6 = PC6('1000AA', self.connection) # No average this time pc6.attributes['energy_label_epi_log_avg'] = None buurt = Buurt('BU0000000', self.connection) buurt.attributes['energy_label_epi_log_avg'] = 0.1823215568 attributes = { 'vbo_id': '0003010000000001', 'bouwjaar': 2020, 'woningtype': 'tussenwoning' } dwelling = Dwelling(attributes, self.connection) dwelling.regions['pc6'] = pc6 dwelling.regions['buurt'] = buurt self.energy_label_prediction_module.process(dwelling) self.energy_label_prediction_module.process(dwelling) # Should be the same as previous calculation. self.assertAlmostEqual(dwelling.attributes['energy_label_epi_mean'], 1.1123215698958466, places=3)
def test_uses_building_code_and_measures_for_buildings_after_2006_windows(self): attributes = { 'bouwjaar': 2008, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) # WINDOWS # base value: not from the building code, # but for double glazing: 0.333 p_window_measure_2018 = 1.98 * 312337 / 7189902 p_window_measure_2019 = 1.98 * 376869 / 7261671 p_window_measure = p_window_measure_2018 + p_window_measure_2019 expected_window_mean = (1 - p_window_measure) * 0.333 + p_window_measure * (0.5 + 0.625)/2 self.assertAlmostEqual(dwelling.attributes['insulation_window_r_dist'].mean, expected_window_mean) self.assertAlmostEqual(dwelling.attributes['insulation_window_r_dist'].p(0.333), 1 - p_window_measure)
def test_uses_building_code_and_measures_for_buildings_after_2006_facade(self): # From 2006 and onwards, # we don't have the WoON data anymore, # so we use the building code. # Easy example with just two applicable # measure years (2018 and 2019). attributes = { 'bouwjaar': 2008, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) # FACADE # Applicable building code: # 2003: 2.5 # Measures R-value: # 2018: # 2.9: 11.4 / (11.4 + 14.5) # 3.4: 14.5 / (11.4 + 14.5) # 2019: # 3.3: 10.2 / (10.2 + 11.8) # 3.8: 11.8 / (10.2 + 11.8) # Measures prob: # multiplier: 1.98 # 2018 measures: 100978 # 2019 measures: 125197 # dwellings until 2008: 7189902 # dwellings until 2009: 7261671 p_facade_measure_2018 = 1.98 * 100978 / 7189902 # ~ 2.8% p_facade_measure_2019 = 1.98 * 125197 / 7261671 # ~ 3.4% p_facade_measure = p_facade_measure_2018 + p_facade_measure_2019 p_r_29 = 11.4 / (11.4 + 14.5) p_r_34 = 14.5 / (11.4 + 14.5) p_r_33 = 10.2 / (10.2 + 11.8) p_r_38 = 11.8 / (10.2 + 11.8) expected_facade_mean = 2.5 + p_facade_measure_2018 * (2.9 * p_r_29 + 3.4 * p_r_34) + p_facade_measure_2019 * (3.3 * p_r_33 + 3.8 * p_r_38) self.assertAlmostEqual(dwelling.attributes['insulation_facade_r_dist'].mean, expected_facade_mean) self.assertAlmostEqual(dwelling.attributes['insulation_facade_r_dist'].p(2.5), 1 - p_facade_measure)
def test_uses_building_code_and_measures_for_buildings_after_2006_roof(self): attributes = { 'bouwjaar': 2008, 'woningtype': 'vrijstaand' } dwelling = Dwelling(attributes, self.mock_connection) self.insulation_module.process(dwelling) p_r_29 = 11.4 / (11.4 + 14.5) p_r_34 = 14.5 / (11.4 + 14.5) p_r_33 = 10.2 / (10.2 + 11.8) p_r_38 = 11.8 / (10.2 + 11.8) p_roof_measure_2018 = 1.98 * 199784 / 7189902 p_roof_measure_2019 = 1.98 * 246325 / 7261671 p_roof_measure = p_roof_measure_2018 + p_roof_measure_2019 expected_roof_mean = 2.5 + p_roof_measure_2018 * (2.9 * p_r_29 + 3.4 * p_r_34) + p_roof_measure_2019 * (3.3 * p_r_33 + 3.8 * p_r_38) self.assertAlmostEqual(dwelling.attributes['insulation_roof_r_dist'].mean, expected_roof_mean) self.assertAlmostEqual(dwelling.attributes['insulation_roof_r_dist'].p(2.5), 1 - p_roof_measure)
def pipeline(query, connection, fresh=False, N=None): # set N = None to process full BAG. # set fresh = True to delete previous results. start_time = time.time() print(f'fresh: {fresh} (if True, previous results will be deleted)') # Also deletes existing `results' table print("\nCreating table 'results'...") create_results_table(fresh) print("Adding primary key on vbo_id...") make_primary_key('results', 'vbo_id') print("\nInitiating modules...") regional_modules = get_regional_modules(connection) modules = get_modules(connection, regional_modules) print("\nGetting dwellings...") # We create a named server-side cursor: # https://www.psycopg.org/docs/usage.html#server-side-cursors # This keeps the memory usage down # since it will only fetch about 2000 (see cursor.itersize) # rows at a time into the Python memory. # You don't need to close() this cursor afterwards (in fact # the cursor disappears after a commit). cursor = connection.cursor(name='pipeline-cursor') cursor.execute(query) # import pdb # pdb.set_trace() bag_count = 7892928 results_count_estimate = get_rowcount_estimate('results', connection) print(f'Batch statistics:') print(f' BAG entries: {bag_count}') print( f' estimate of current number of results (might be outdated): {results_count_estimate} ({results_count_estimate/bag_count*100:.2f}%)' ) print(f' this batch: {"no number specified" if N is None else N}') print('\nStarting processing...') i = 0 for (vbo_id, pc6, oppervlakte, bouwjaar, woningtype, buurt_id) in cursor: attributes = { 'vbo_id': vbo_id, 'pc6': pc6, 'oppervlakte': oppervlakte, 'bouwjaar': bouwjaar, 'woningtype': woningtype, 'buurt_id': buurt_id } dwelling = Dwelling(attributes, connection) for module in modules: module.process(dwelling) dwelling.save() i += 1 if i % 100 == 0: print(f' processed dwelling: {i}', end='\r') if i == N: break print("\n\nCommiting and closing...") connection.commit() connection.close() print( f'Processed {i:,} records in {(time.time() - start_time):.2f} seconds.' )
def test_gets_both_energy_label_class_and_epi_imputed(self): dwelling = Dwelling({'vbo_id': '0003010000000001'}, self.connection) self.energy_label_module.process(dwelling) self.assertEqual(dwelling.attributes['energy_label_class'], 'A') self.assertEqual(dwelling.attributes['energy_label_epi'], 0.7)
def test_sets_to_none_when_no_label(self): dwelling = Dwelling({'vbo_id': '0003010000000002'}, self.connection) self.energy_label_module.process(dwelling) self.assertEqual(dwelling.attributes['energy_label_class'], None) self.assertEqual(dwelling.attributes['energy_label_epi'], None)