def test_save_snapshot_match(self): """Test good case for saving a snapshot match.""" self.assertEqual(seed_models.BuildingSnapshot.objects.all().count(), 2) bs2_canon = seed_models.CanonicalBuilding.objects.create( canonical_snapshot=self.bs2) self.bs2.canonical_building = bs2_canon self.bs2.save() seed_models.save_snapshot_match(self.bs1.pk, self.bs2.pk, confidence=0.9, user=self.fake_user) # We made an entirely new snapshot! self.assertEqual(seed_models.BuildingSnapshot.objects.all().count(), 3) result = seed_models.BuildingSnapshot.objects.all()[0] # Affirm that we give preference to the first BS passed # into our method. self.assertEqual(result.property_name, self.bs1.property_name) self.assertEqual(result.property_name_source, self.bs1) # Ensure that we transfer the meter relationship to merged children. self.assertEqual([r.pk for r in result.meters.all()], [self.meter.pk]) # Test that all the parent/child relationships are sorted. self.assertEqual(result.confidence, 0.9) self.assertEqual(sorted([r.pk for r in result.parents.all()]), sorted([self.bs1.pk, self.bs2.pk])) # Test that "duplicate" CanonicalBuilding is now marked inactive. refreshed_bs2 = seed_models.BuildingSnapshot.objects.get( pk=self.bs2.pk) refreshed_bs2_canon = refreshed_bs2.canonical_building self.assertFalse(refreshed_bs2_canon.active)
def _check_save_snapshot_match_with_default(self, default_pk): """Test good case for saving a snapshot match.""" self.assertEqual(seed_models.BuildingSnapshot.objects.all().count(), 2) bs2_canon = seed_models.CanonicalBuilding.objects.create(canonical_snapshot=self.bs2) self.bs2.canonical_building = bs2_canon self.bs2.save() default_building = self.bs1 if default_pk == self.bs1.pk else self.bs2 seed_models.save_snapshot_match( self.bs1.pk, self.bs2.pk, confidence=0.9, user=self.fake_user, default_pk=default_pk ) # We made an entirely new snapshot! self.assertEqual(seed_models.BuildingSnapshot.objects.all().count(), 3) result = seed_models.BuildingSnapshot.objects.all()[0] # Affirm that we give preference to the first BS passed # into our method. self.assertEqual(result.property_name, default_building.property_name) self.assertEqual(result.property_name_source, default_building) # Ensure that we transfer the meter relationship to merged children. self.assertEqual([r.pk for r in result.meters.all()], [self.meter.pk]) # Test that all the parent/child relationships are sorted. self.assertEqual(result.confidence, 0.9) self.assertEqual(sorted([r.pk for r in result.parents.all()]), sorted([self.bs1.pk, self.bs2.pk])) # Test that "duplicate" CanonicalBuilding is now marked inactive. refreshed_bs2 = seed_models.BuildingSnapshot.objects.get(pk=self.bs2.pk) refreshed_bs2_canon = refreshed_bs2.canonical_building self.assertFalse(refreshed_bs2_canon.active)
def handle_results(results, b_idx, can_rev_idx, unmatched_list, user_pk): """Seek IDs and save our snapshot match. :param results: list of tuples. [('match', 0.99999),...] :param b_idx: int, the index of the current building in the unmatched_list. :param can_rev_idx: dict, reverse index from match -> canonical PK. :param user_pk: user ID, used for AuditLog logging :unmatched_list: list of dicts, the result of a values_list query for unmatched BSes. """ match_string, confidence = results[0] # We always care about closest match match_type = SYSTEM_MATCH # If we passed the minimum threshold, we're here, but we need to # distinguish probable matches from good matches. if confidence < getattr(settings, 'MATCH_MED_THRESHOLD', 0.7): match_type = POSSIBLE_MATCH can_snap_pk = can_rev_idx[match_string] building_pk = unmatched_list[b_idx][0] # First element is PK bs = save_snapshot_match( can_snap_pk, building_pk, confidence=confidence, match_type=match_type, default_pk=building_pk ) canon = bs.canonical_building AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note='System matched building.', action='save_system_match', organization=bs.super_organization, )
def handle_id_matches(unmatched_bs, import_file, user_pk): """"Deals with exact maches in the IDs of buildings.""" id_matches = get_canonical_id_matches( unmatched_bs.super_organization_id, unmatched_bs.pm_property_id, unmatched_bs.tax_lot_id, unmatched_bs.custom_id_1 ) if not id_matches.exists(): return # merge save as system match with high confidence. for can_snap in id_matches: # Merge all matches together; updating "unmatched" pointer # as we go. unmatched_bs = save_snapshot_match( can_snap.pk, unmatched_bs.pk, confidence=0.9, # TODO(gavin) represent conf better. match_type=SYSTEM_MATCH, user=import_file.import_record.owner ) canon = unmatched_bs.canonical_building canon.canonical_snapshot = unmatched_bs canon.save() AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note='System matched building ID.', action='save_system_match', organization=unmatched_bs.super_organization, ) # Returns the most recent child of all merging. return unmatched_bs
def handle_results(results, b_idx, can_rev_idx, unmatched_list, user_pk): """Seek IDs and save our snapshot match. :param results: list of tuples. [('match', 0.99999),...] :param b_idx: int, the index of the current building in the unmatched_list. :param can_rev_idx: dict, reverse index from match -> canonical PK. :param user_pk: user ID, used for AuditLog logging :unmatched_list: list of dicts, the result of a values_list query for unmatched BSes. """ match_string, confidence = results[0] # We always care about closest match match_type = SYSTEM_MATCH # If we passed the minimum threshold, we're here, but we need to # distinguish probable matches from good matches. if confidence < getattr(settings, 'MATCH_MED_THRESHOLD', 0.7): match_type = POSSIBLE_MATCH can_snap_pk = can_rev_idx[match_string] building_pk = unmatched_list[b_idx][0] # First element is PK bs = save_snapshot_match( can_snap_pk, building_pk, confidence=confidence, match_type=match_type ) canon = bs.canonical_building AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note='System matched building.', action='save_system_match', organization=bs.super_organization, )
def save_match(request): """adds and removes matches to/from an ImportedBuilding JSON payload: body = { 'source_building_id': 123, 'target_building_id': 512, 'create_match': True } called from services.js building_factory.save_match """ body = json.loads(request.body) create = body.get('create_match') b1_pk = body['source_building_id'] b2_pk = body.get('target_building_id') child_id = None if create: child_id = save_snapshot_match( b1_pk, b2_pk, user=request.user, match_type=2 ) child_id = child_id.pk else: unmatch_snapshot(b1_pk) return { 'status': 'success', 'child_id': child_id, }
def test_get_building(self): """ tests the get_building view which retuns building detail and source information from parent buildings. """ # arrange child = save_snapshot_match(self.parent_1.pk, self.parent_2.pk) url = reverse_lazy("seed:get_building") get_data = { 'building_id': child.pk, 'organization_id': self.org.pk, } # act response = self.client.get( url, get_data, content_type='application/json', ) json_string = response.content data = json.loads(json_string) # assert self.assertEqual(data['status'], 'success') self.assertEqual(len(data['imported_buildings']), 2) # both parents have the same child self.assertEqual( data['imported_buildings'][0]['children'][0], child.pk ) self.assertEqual( data['imported_buildings'][1]['children'][0], child.pk ) # both parents link to their import file self.assertEqual( data['imported_buildings'][0]['import_file'], self.import_file_1.pk ) self.assertEqual( data['imported_buildings'][1]['import_file'], self.import_file_2.pk ) # child should get the first address self.assertEqual( data['building']['address_line_1'], self.parent_1.address_line_1 ) self.assertEqual( data['building']['address_line_1_source'], self.parent_1.pk ) # child should get second gross floor area since first is set to None self.assertEqual( data['building']['gross_floor_area_source'], self.parent_2.pk )
def handle_results(results, b_idx, can_rev_idx, unmatched_list): """Seek IDs and save our snapshot match. :param results: list of tuples. [('match', 0.99999),...] :param b_idx: int, the index of the current building in the unmatched_list. :param can_rev_idx: dict, reverse index from match -> canonical PK. :unmatched_list: list of dicts, the result of a values_list query for unmatched BSes. """ match_string, confidence = results[0] # We always care about closest match match_type = SYSTEM_MATCH # If we passed the minimum threshold, we're here, but we need to # distinguish probable matches from good matches. if confidence < getattr(settings, 'MATCH_MED_THRESHOLD', 0.7): match_type = POSSIBLE_MATCH can_snap_pk = can_rev_idx[match_string] building_pk = unmatched_list[b_idx][0] # First element is PK save_snapshot_match( can_snap_pk, building_pk, confidence=confidence, match_type=match_type )
def test_get_building_with_deleted_dataset(self): """ tests the get_building view where the dataset has been deleted and the building should load without showing the sources from deleted import files. """ # arrange child = save_snapshot_match(self.parent_1.pk, self.parent_2.pk) url = reverse_lazy("seed:get_building") get_data = { 'building_id': child.pk, 'organization_id': self.org.pk, } # act self.import_record.delete() response = self.client.get( url, get_data, content_type='application/json', ) json_string = response.content data = json.loads(json_string) # assert self.assertEqual(data['status'], 'success') # empty list of parents self.assertEqual(len(data['imported_buildings']), 0) # building should still have all its info self.assertEqual( data['building']['address_line_1'], self.parent_1.address_line_1 ) self.assertEqual( data['building']['address_line_1_source'], self.parent_1.pk ) self.assertEqual( data['building']['gross_floor_area_source'], self.parent_2.pk ) self.assertAlmostEqual( data['building']['gross_floor_area'], self.parent_2.gross_floor_area, places=1, )
def handle_results(results, b_idx, can_rev_idx, unmatched_list, user_pk): """Seek IDs and save our snapshot match. :param results: list of tuples. [('match', 0.99999),...] :param b_idx: int, the index of the current building in the unmatched_list. :param can_rev_idx: dict, reverse index from match -> canonical PK. :param user_pk: user ID, used for AuditLog logging :unmatched_list: list of dicts, the result of a values_list query for unmatched BSes. """ match_string, confidence = results[0] # We always care about closest match match_type = SYSTEM_MATCH # If we passed the minimum threshold, we're here, but we need to # distinguish probable matches from good matches. if confidence < getattr(settings, 'MATCH_MED_THRESHOLD', 0.7): match_type = POSSIBLE_MATCH can_snap_pk = can_rev_idx[match_string] building_pk = unmatched_list[b_idx][0] # First element is PK bs, changes = save_snapshot_match(can_snap_pk, building_pk, confidence=confidence, match_type=match_type, default_pk=building_pk) canon = bs.canonical_building action_note = 'System matched building.' if changes: action_note += " Fields changed in cannonical building:\n" for change in changes: action_note += "\t{field}:\t".format(field=change["field"].replace( "_", " ").replace("-", "").capitalize()) if "from" in change: action_note += "From:\t{prev}\tTo:\t".format( prev=change["from"]) action_note += "{value}\n".format(value=change["to"]) action_note = action_note[:-1] AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note=action_note, action='save_system_match', organization=bs.super_organization, )
def handle_results(results, b_idx, can_rev_idx, unmatched_list, user_pk): """Seek IDs and save our snapshot match. :param results: list of tuples. [('match', 0.99999),...] :param b_idx: int, the index of the current building in the unmatched_list. :param can_rev_idx: dict, reverse index from match -> canonical PK. :param user_pk: user ID, used for AuditLog logging :unmatched_list: list of dicts, the result of a values_list query for unmatched BSes. """ match_string, confidence = results[0] # We always care about closest match match_type = SYSTEM_MATCH # If we passed the minimum threshold, we're here, but we need to # distinguish probable matches from good matches. if confidence < getattr(settings, 'MATCH_MED_THRESHOLD', 0.7): match_type = POSSIBLE_MATCH can_snap_pk = can_rev_idx[match_string] building_pk = unmatched_list[b_idx][0] # First element is PK bs, changes = save_snapshot_match( can_snap_pk, building_pk, confidence=confidence, match_type=match_type, default_pk=building_pk ) canon = bs.canonical_building action_note = 'System matched building.' if changes: action_note += " Fields changed in cannonical building:\n" for change in changes: action_note += "\t{field}:\t".format(field=change["field"].replace("_", " ").replace("-", "").capitalize()) if "from" in change: action_note += "From:\t{prev}\tTo:\t".format(prev=change["from"]) action_note += "{value}\n".format(value=change["to"]) action_note = action_note[:-1] AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note=action_note, action='save_system_match', organization=bs.super_organization, )
def test_unmatch_snapshot_tree_retains_canonical_snapshot(self): """ TODO: """ self.bs3 = util.make_fake_snapshot( self.import_file1, self.bs1_data, bs_type=seed_models.COMPOSITE_BS, is_canon=True, ) self.bs4 = util.make_fake_snapshot( self.import_file1, self.bs2_data, bs_type=seed_models.COMPOSITE_BS, is_canon=True, ) # simulate matching bs1 and bs2 to have a child of bs3 seed_models.save_snapshot_match(self.bs2.pk, self.bs1.tip.pk) seed_models.save_snapshot_match(self.bs3.pk, self.bs1.tip.pk) seed_models.save_snapshot_match(self.bs4.pk, self.bs1.tip.pk) tip_pk = self.bs1.tip.pk # simulating the following tree: # b1 b2 # \ / # b3 b4 # \ / # b5 # unmatch bs3 from bs4 seed_models.unmatch_snapshot_tree(self.bs4.pk) # tip should be deleted self.assertFalse( seed_models.BuildingSnapshot.objects.filter(pk=tip_pk).exists()) canon_bs4 = seed_models.CanonicalBuilding.objects.get( pk=self.bs4.canonical_building_id) # both of their canons should be active self.assertTrue(canon_bs4.active) # both cannons should have a canonical_snapshot self.assertEqual(canon_bs4.canonical_snapshot, self.bs4)
def test_unmatch_snapshot_tree_retains_canonical_snapshot(self): """ TODO: """ self.bs3 = util.make_fake_snapshot( self.import_file1, self.bs1_data, bs_type=seed_models.COMPOSITE_BS, is_canon=True, ) self.bs4 = util.make_fake_snapshot( self.import_file1, self.bs2_data, bs_type=seed_models.COMPOSITE_BS, is_canon=True, ) # simulate matching bs1 and bs2 to have a child of bs3 seed_models.save_snapshot_match(self.bs2.pk, self.bs1.tip.pk) seed_models.save_snapshot_match(self.bs3.pk, self.bs1.tip.pk) seed_models.save_snapshot_match(self.bs4.pk, self.bs1.tip.pk) tip_pk = self.bs1.tip.pk # simulating the following tree: # b1 b2 # \ / # b3 b4 # \ / # b5 # unmatch bs3 from bs4 seed_models.unmatch_snapshot_tree(self.bs4.pk) # tip should be deleted self.assertFalse(seed_models.BuildingSnapshot.objects.filter(pk=tip_pk).exists()) canon_bs4 = seed_models.CanonicalBuilding.objects.get(pk=self.bs4.canonical_building_id) # both of their canons should be active self.assertTrue(canon_bs4.active) # both cannons should have a canonical_snapshot self.assertEqual(canon_bs4.canonical_snapshot, self.bs4)
def handle_id_matches(unmatched_bs, import_file, user_pk): """"Deals with exact matches in the IDs of buildings.""" id_matches = get_canonical_id_matches( unmatched_bs.super_organization_id, unmatched_bs.pm_property_id, unmatched_bs.tax_lot_id, unmatched_bs.custom_id_1 ) if not id_matches.exists(): return # Check to see if there are any duplicates here for can_snap in id_matches: # check to see if this is a duplicate of a canonical building # if throwing incurs too much of a performance hit maybe just monkey-patch # unmatched_bs and check it on the other side like # unmatched_bs.duplicate_of_pk = snapshot.pk # return unmatched_bs if is_same_snapshot(unmatched_bs, can_snap): raise DuplicateDataError(can_snap.pk) # iterate through all of the parent records and see if there is a duplicate there for snapshot in can_snap.parent_tree: if is_same_snapshot(unmatched_bs, snapshot): raise DuplicateDataError(snapshot.pk) # merge save as system match with high confidence. for can_snap in id_matches: # Merge all matches together; updating "unmatched" pointer # as we go. unmatched_bs, changes = save_snapshot_match( can_snap.pk, unmatched_bs.pk, confidence=0.9, # TODO(gavin) represent conf better. match_type=SYSTEM_MATCH, user=import_file.import_record.owner, default_pk=unmatched_bs.pk ) canon = unmatched_bs.canonical_building canon.canonical_snapshot = unmatched_bs canon.save() action_note = 'System matched building ID.' if changes: action_note += " Fields changed in cannonical building:\n" for change in changes: action_note += "\t{field}:\t".format( field=change["field"].replace("_", " ").replace("-", "").capitalize()) if "from" in change: action_note += "From:\t{prev}\tTo:\t".format(prev=change["from"]) action_note += "{value}\n".format(value=change["to"]) action_note = action_note[:-1] AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note=action_note, action='save_system_match', organization=unmatched_bs.super_organization, ) # Returns the most recent child of all merging. return unmatched_bs
def handle_id_matches(unmatched_bs, import_file, user_pk): """"Deals with exact matches in the IDs of buildings.""" id_matches = get_canonical_id_matches(unmatched_bs.super_organization_id, unmatched_bs.pm_property_id, unmatched_bs.tax_lot_id, unmatched_bs.custom_id_1) if not id_matches.exists(): return #Check to see if there are any duplicates here for can_snap in id_matches: #check to see if this is a duplicate of a canonical building #if throwing incurs too much of a performance hit maybe just monkey-patch #unmatched_bs and check it on the other side like #unmatched_bs.duplicate_of_pk = snapshot.pk #return unmatched_bs if is_same_snapshot(unmatched_bs, can_snap): raise DuplicateDataError(can_snap.pk) #iterate through all of the parent records and see if there is a duplicate there for snapshot in can_snap.parent_tree: if is_same_snapshot(unmatched_bs, snapshot): raise DuplicateDataError(snapshot.pk) # merge save as system match with high confidence. for can_snap in id_matches: # Merge all matches together; updating "unmatched" pointer # as we go. unmatched_bs, changes = save_snapshot_match( can_snap.pk, unmatched_bs.pk, confidence=0.9, # TODO(gavin) represent conf better. match_type=SYSTEM_MATCH, user=import_file.import_record.owner, default_pk=unmatched_bs.pk) canon = unmatched_bs.canonical_building canon.canonical_snapshot = unmatched_bs canon.save() action_note = 'System matched building ID.' if changes: action_note += " Fields changed in cannonical building:\n" for change in changes: action_note += "\t{field}:\t".format( field=change["field"].replace("_", " ").replace( "-", "").capitalize()) if "from" in change: action_note += "From:\t{prev}\tTo:\t".format( prev=change["from"]) action_note += "{value}\n".format(value=change["to"]) action_note = action_note[:-1] AuditLog.objects.create( user_id=user_pk, content_object=canon, action_note=action_note, action='save_system_match', organization=unmatched_bs.super_organization, ) # Returns the most recent child of all merging. return unmatched_bs