def test_ignore_non_VALUES_lines(self): input = ['VALUES(1', 'other', 'VALUES(2'] output = list(get_records(input, line_parser=mock_line_to_dictionary)) self.assertEqual([input[0], input[2]], output)
def test_plain(self): # If we dummy up the parser, we get back our input output = list(get_records(self.input, line_parser=mock_line_to_dictionary)) self.assertEqual(self.input, output)
def import_citizen_dump(input_filename, max_change_percent=DEFAULT_MAX_CHANGE_PERCENT, encoding='UTF-8'): with transaction.atomic(): # Clear out TempCitizen table. (We clear it at the end too, but this makes # extra sure that we start with it empty.) delete_all('default', [TempCitizen]) num_records_at_start = Citizen.objects.count() # # 1. Fill our temp table with the data from the latest dump # logger.info("Loading data from dump") input_file = codecs.open(input_filename, encoding=encoding) logger.info("Reading %s" % input_filename) batch = BatchOperations(TempCitizen) records_read = 0 for record in get_records(input_file): records_read += 1 batch.add(record) batch.flush() # # 2. Sync data from temp table to our real table # logger.info("Updating our own database") stats = mirror_database(from_model=TempCitizen, to_model=Citizen) # See what % of the records we're changing if num_records_at_start > 0: num_changes = (stats.modified_record_count + stats.new_record_count + stats.not_there_anymore_count) percent_changed = 100 * (num_changes / num_records_at_start) if percent_changed > max_change_percent: raise TooManyChanges( "Too many changes, aborting Citizen data import. Max change is %f%% but " "the import would have changed %f%% records (%d/%d). Use " "--max-change-percent=NN to override this limit if necessary." % (max_change_percent, percent_changed, num_changes, num_records_at_start)) # Add our data stats.records_read = records_read # Make a note of when we did it timestamp = now() CitizenMetadata.objects.update_or_create(defaults=dict(dump_time=timestamp)) # Flag any records that turned up missing if stats.missing_pks: Citizen.objects.filter(pk__in=stats.missing_pks, missing=None).update(missing=timestamp) # And we're done! # Clear out our temp table (no point in taking up disk space) delete_all('default', [TempCitizen]) return stats
def test_plain(self): # If we dummy up the parser, we get back our input output = list( get_records(self.input, line_parser=mock_line_to_dictionary)) self.assertEqual(self.input, output)