def test_new_record(self): cit = TempCitizenFactory() stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(0, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(1, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_unchanged_record(self): cit = CitizenFactory() TempCitizenFactory(**model_to_dict(cit)) stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(1, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count)
def test_empty_dbs(self): stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(0, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count)
def test_changed_record(self): cit = CitizenFactory() temp_cit = TempCitizenFactory(pk=cit.pk, first_name=cit.first_name + '_CHANGED') stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(0, stats.unchanged_count) self.assertEqual(1, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) c = Citizen.objects.get(pk=cit.pk) self.assertEqual(temp_cit.first_name, c.first_name)
def test_new_record(self): cit = TempCitizenFactory() stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(0, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(1, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_unchanged_record(self): cit = CitizenFactory() TempCitizenFactory(**model_to_dict(cit)) stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(1, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count)
def test_record_gone_do_not_delete_with_existing(self): existing = CitizenFactory(pk=1) TempCitizenFactory(pk=1, **model_to_dict(existing)) cit = CitizenFactory(pk=2) existing2 = CitizenFactory(pk=3) TempCitizenFactory(pk=3, **model_to_dict(existing2)) stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(2, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(1, stats.not_there_anymore_count) # NOT deleted self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_changed_record(self): cit = CitizenFactory() temp_cit = TempCitizenFactory(pk=cit.pk, first_name=cit.first_name + '_CHANGED') stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(0, stats.unchanged_count) self.assertEqual(1, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) c = Citizen.objects.get(pk=cit.pk) self.assertEqual(temp_cit.first_name, c.first_name)
def test_new_record_with_existing(self): nid = 100000000000 existing = CitizenFactory(pk=1, national_id=nid) TempCitizenFactory(pk=1, **model_to_dict(existing)) nid += 1 cit = TempCitizenFactory(pk=2, national_id=nid) nid += 1 existing2 = CitizenFactory(pk=3, national_id=nid) TempCitizenFactory(pk=3, **model_to_dict(existing2)) stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(2, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(1, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_record_gone_do_not_delete_with_existing(self): existing = CitizenFactory(pk=1) TempCitizenFactory(pk=1, **model_to_dict(existing)) cit = CitizenFactory(pk=2) existing2 = CitizenFactory(pk=3) TempCitizenFactory(pk=3, **model_to_dict(existing2)) stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(2, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(1, stats.not_there_anymore_count) # NOT deleted self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_new_record_with_existing(self): nid = 100000000000 existing = CitizenFactory(pk=1, national_id=nid) TempCitizenFactory(pk=1, **model_to_dict(existing)) nid += 1 cit = TempCitizenFactory(pk=2, national_id=nid) nid += 1 existing2 = CitizenFactory(pk=3, national_id=nid) TempCitizenFactory(pk=3, **model_to_dict(existing2)) stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(2, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(1, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) self.assertTrue(Citizen.objects.filter(pk=cit.pk).exists())
def test_changed_record_with_existing(self): nid = 100000000000 existing = CitizenFactory(pk=1, national_id=nid) nid += 1 TempCitizenFactory(pk=existing.pk, **model_to_dict(existing)) nid += 1 cit = CitizenFactory(pk=2, national_id=nid) temp_cit = TempCitizenFactory(pk=cit.pk, national_id=nid, first_name=cit.first_name + '_CHANGED') existing2 = CitizenFactory(pk=3) TempCitizenFactory(pk=existing2.pk, **model_to_dict(existing2)) stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(2, stats.unchanged_count) self.assertEqual(1, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) c = Citizen.objects.get(pk=cit.pk) self.assertEqual(temp_cit.first_name, c.first_name)
def test_changed_record_with_existing(self): nid = 100000000000 existing = CitizenFactory(pk=1, national_id=nid) nid += 1 TempCitizenFactory(pk=existing.pk, **model_to_dict(existing)) nid += 1 cit = CitizenFactory(pk=2, national_id=nid) temp_cit = TempCitizenFactory(pk=cit.pk, national_id=nid, first_name=cit.first_name + '_CHANGED') existing2 = CitizenFactory(pk=3) TempCitizenFactory(pk=existing2.pk, **model_to_dict(existing2)) stats = mirror_database( from_model=TempCitizen, to_model=Citizen ) self.assertEqual(2, stats.unchanged_count) self.assertEqual(1, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count) c = Citizen.objects.get(pk=cit.pk) self.assertEqual(temp_cit.first_name, c.first_name)
def test_empty_dbs(self): stats = mirror_database(from_model=TempCitizen, to_model=Citizen) self.assertEqual(0, stats.unchanged_count) self.assertEqual(0, stats.modified_record_count) self.assertEqual(0, stats.new_record_count) self.assertEqual(0, stats.not_there_anymore_count)
def import_citizen_dump(input_filename, max_change_percent=DEFAULT_MAX_CHANGE_PERCENT, encoding='UTF-8'): with transaction.atomic(): # Clear out TempCitizen table. (We clear it at the end too, but this makes # extra sure that we start with it empty.) delete_all('default', [TempCitizen]) num_records_at_start = Citizen.objects.count() # # 1. Fill our temp table with the data from the latest dump # logger.info("Loading data from dump") input_file = codecs.open(input_filename, encoding=encoding) logger.info("Reading %s" % input_filename) batch = BatchOperations(TempCitizen) records_read = 0 for record in get_records(input_file): records_read += 1 batch.add(record) batch.flush() # # 2. Sync data from temp table to our real table # logger.info("Updating our own database") stats = mirror_database(from_model=TempCitizen, to_model=Citizen) # See what % of the records we're changing if num_records_at_start > 0: num_changes = (stats.modified_record_count + stats.new_record_count + stats.not_there_anymore_count) percent_changed = 100 * (num_changes / num_records_at_start) if percent_changed > max_change_percent: raise TooManyChanges( "Too many changes, aborting Citizen data import. Max change is %f%% but " "the import would have changed %f%% records (%d/%d). Use " "--max-change-percent=NN to override this limit if necessary." % (max_change_percent, percent_changed, num_changes, num_records_at_start)) # Add our data stats.records_read = records_read # Make a note of when we did it timestamp = now() CitizenMetadata.objects.update_or_create(defaults=dict(dump_time=timestamp)) # Flag any records that turned up missing if stats.missing_pks: Citizen.objects.filter(pk__in=stats.missing_pks, missing=None).update(missing=timestamp) # And we're done! # Clear out our temp table (no point in taking up disk space) delete_all('default', [TempCitizen]) return stats