def handle(self, *args, **options): filename = options['filename'][0] with open(filename) as myfile: print("Reading electoral data...") reader = csv.DictReader(myfile) data = [preprocess_dict(x) for x in reader] print("done - %d records read" % len(data)) records_done = 0 temp_list = [] for grouper, my_group in groupby(data, key=groupby_key): my_group = list(my_group) domecile_dict = split_dict(my_group[0], domecile_elements) domecile_dict['electoral_registration_office'] = self.ero domecile_obj, result = Domecile.objects.get_or_create( **domecile_dict) for line in my_group: contact = split_dict(line, contact_elements) contact_obj = Contact.objects.filter( domecile=domecile_obj, ero_number=contact['ero_number'], pd=contact['pd']).first() records_done += 1 if not contact_obj: contact_obj = Contact(**contact) contact_obj.domecile = domecile_obj temp_list.append(contact_obj) if records_done % 5000 == 0: print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj)) Contact.objects.bulk_create(temp_list) temp_list = [] if temp_list: Contact.objects.bulk_create(temp_list) print(temp_list)
def handle(self, *args, **options): filename = options['filename'][0] with open(filename) as myfile: print("Reading electoral data...") reader = csv.DictReader(myfile) data = [transform_dict(x, rename_dict) for x in reader] for line in data: if 'postcode' not in line or not line['postcode']: for i in range(7, 0, -1): index = 'address_' + str(i) if line[index]: try: line['postcode'] = line['address_' + str(i)] line['address_%d' + str(i)] = '' break except KeyError: line['postcode'] = '' break finally: if line['postcode'] == 'OTHER ELECTORS': line['postcode'] = '' data.sort(key=groupby_key) print("done - %d records read" % len(data)) records_done = 0 temp_list = [] for grouper, my_group in groupby(data, key=groupby_key): my_group = list(my_group) domecile_dict = split_dict(my_group[0], domecile_elements) domecile_dict['electoral_registration_office'] = self.ero try: domecile_obj, result = Domecile.objects.get_or_create(**domecile_dict) except DataError: print(domecile_dict) raise for line in my_group: contact_dict = split_dict(line, contact_elements) if contact_dict['date_of_attainment']: temp = [int(x) for x in contact_dict['date_of_attainment'].split('/')] contact_dict['date_of_attainment'] = date(temp[2], temp[1], temp[0]) else: contact_dict['date_of_attainment'] = None contact_obj = Contact.objects.filter(ero_number=contact_dict['ero_number'], domecile__electoral_registration_office=self.ero, pd=contact_dict['pd']).first() records_done += 1 if not contact_obj: contact_obj = Contact(**contact_dict) contact_obj.domecile = domecile_obj temp_list.append(contact_obj) if records_done % 1000 == 0: print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj)) Contact.objects.bulk_create(temp_list) temp_list = [] if temp_list: Contact.objects.bulk_create(temp_list) print(temp_list)
def separate_contacts_and_domiciles(line): domecile = { key.replace('Domecile.', ''): y for key, y in line.items() if 'Domecile.' in key } try: matched = Domecile.objects.get( **{ key.replace('Domecile.', ''): y for key, y in line.items() if 'Domecile.' in key }) except Domecile.DoesNotExist: matched = Domecile(**domecile) matched.save() contact = { key.replace('Contact.', ''): y for key, y in line.items() if 'Contact.' in key } contact = Contact(**contact) contact.domecile = matched return contact
def handle(self, *args, **options): filename = options['filename'][0] with open(filename) as myfile: print("Reading electoral data...") reader = csv.DictReader(myfile) data = [transform_dict(x, rename_dict) for x in reader] data.sort(key=groupby_key) print("done - %d records read" % len(data)) records_done = 0 temp_list, error_list = [], [] for grouper, my_group in groupby(data, key=groupby_key): my_group = list(my_group) domecile_dict = split_dict(my_group[0], domecile_elements) domecile_dict['electoral_registration_office'] = self.ero domecile_obj, result = Domecile.objects.get_or_create( **domecile_dict) for line in my_group: contact_dict = split_dict(line, contact_elements) if contact_dict['date_of_attainment']: temp = [ int(x) for x in contact_dict['date_of_attainment'].split('/') ] contact_dict['date_of_attainment'] = date( temp[2], temp[1], temp[0]) else: contact_dict['date_of_attainment'] = None contact_obj = Contact.objects.filter( ero_number=contact_dict['ero_number'], domecile__electoral_registration_office=self.ero, pd=contact_dict['pd']).first() records_done += 1 if not contact_obj: contact_obj = Contact(**contact_dict) contact_obj.domecile = domecile_obj temp_list.append(contact_obj) if records_done % 5 == 0: try: print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj)) except: pass try: Contact.objects.bulk_create(temp_list) except: error_list += temp_list temp_list = [] if temp_list: try: Contact.objects.bulk_create(temp_list) except: error_list += temp_list if error_list: for i in error_list: try: i.save() except: try: print(i) except: pass