def preprocess_dict(my_dict): my_dict['Date of Attainment'] = datetime.strptime( my_dict['Date of Attainment'], '%d/%m/%Y').date() if my_dict['Date of Attainment'] else None for i in range(5, 0, -1): address_field = 'Address %d' % i if postcode_re.match(my_dict[address_field]): my_dict['postcode'] = my_dict[address_field] my_dict[address_field] = '' break else: my_dict['postcode'] = '' address_pieces = [ my_dict["Address %d" % x] for x in range(1, 7) if my_dict["Address %d" % x] ] if address_pieces[0][0].isdigit(): first_pieces = address_pieces[0].split(" ") my_dict['Address 2'] = first_pieces[0] my_dict['Address 3'] = '' my_dict['Address 4'] = " ".join(first_pieces[1:]) for i, piece in enumerate(address_pieces[1:]): my_dict['Address %d' % (i + 5)] = piece my_dict['Address 1'] = '' my_dict = transform_dict(my_dict, rename_dict) return my_dict
def handle(self, *args, **options): for filename in options['filenames']: with open(filename) as myfile: print("Reading electoral data...") reader = list(csv.DictReader(myfile)) for k, line in enumerate(reader): new_dict = transform_dict(line, rename_dict) if k == 0: print(new_dict.keys()) for i in new_dict: new_dict[i] = new_dict[i].replace(",", '') if not new_dict[i]: new_dict[i] = None continue if 'rank' in i: new_dict[i] = str(int(float(new_dict[i]))) continue s = regex.search(new_dict[i]) if not s: new_dict[i] = None else: new_dict[i] = s.group(0) datazone = DataZone.objects.get(code=line['Data Zone'].strip()) try: DataZoneSIMDInfo.objects.update_or_create( defaults=new_dict, datazone=datazone) except: print(new_dict) pass
def preprocess_dict(my_dict): my_dict = {x: y.decode('iso8859_2') for x, y in my_dict.items()} # print(my_dict['Date Of Attainment'], type(my_dict['Date Of Attainment'])) my_dict['Date Of Attainment'] = datetime.strptime( my_dict['Date Of Attainment'], '%d/%m/%Y').date() if my_dict['Date Of Attainment'] else '' for i in range(5, 0, -1): address_field = 'Address %d' % i if postcode_re.match(my_dict[address_field]): my_dict['postcode'] = my_dict[address_field] my_dict[address_field] = '' break else: my_dict['postcode'] = '' address_pieces = [ my_dict["Address %d" % x] for x in range(1, 7) if my_dict["Address %d" % x] ] if address_pieces[0][0].isdigit(): first_pieces = address_pieces[0].split(" ") my_dict['Address 2'] = first_pieces[0] my_dict['Address 3'] = '' my_dict['Address 4'] = " ".join(first_pieces[1:]) for i, piece in enumerate(address_pieces[1:]): my_dict['Address %d' % (i + 5)] = piece my_dict['Address 1'] = '' # print(my_dict, rename_dict) my_dict = transform_dict(my_dict, rename_dict) if my_dict['date_of_attainment'] == '': my_dict['date_of_attainment'] = None return my_dict
def handle(self, *args, **options): filename = options['filename'][0] with open(filename) as myfile: print("Reading electoral data...") reader = csv.DictReader(myfile) data = [transform_dict(x, rename_dict) for x in reader] for line in data: if 'postcode' not in line or not line['postcode']: for i in range(7, 0, -1): index = 'address_' + str(i) if line[index]: try: line['postcode'] = line['address_' + str(i)] line['address_%d' + str(i)] = '' break except KeyError: line['postcode'] = '' break finally: if line['postcode'] == 'OTHER ELECTORS': line['postcode'] = '' data.sort(key=groupby_key) print("done - %d records read" % len(data)) records_done = 0 temp_list = [] for grouper, my_group in groupby(data, key=groupby_key): my_group = list(my_group) domecile_dict = split_dict(my_group[0], domecile_elements) domecile_dict['electoral_registration_office'] = self.ero try: domecile_obj, result = Domecile.objects.get_or_create(**domecile_dict) except DataError: print(domecile_dict) raise for line in my_group: contact_dict = split_dict(line, contact_elements) if contact_dict['date_of_attainment']: temp = [int(x) for x in contact_dict['date_of_attainment'].split('/')] contact_dict['date_of_attainment'] = date(temp[2], temp[1], temp[0]) else: contact_dict['date_of_attainment'] = None contact_obj = Contact.objects.filter(ero_number=contact_dict['ero_number'], domecile__electoral_registration_office=self.ero, pd=contact_dict['pd']).first() records_done += 1 if not contact_obj: contact_obj = Contact(**contact_dict) contact_obj.domecile = domecile_obj temp_list.append(contact_obj) if records_done % 1000 == 0: print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj)) Contact.objects.bulk_create(temp_list) temp_list = [] if temp_list: Contact.objects.bulk_create(temp_list) print(temp_list)
def preprocess_dict(my_dict): address_pieces = my_dict['STREETADD1'].split() if address_pieces and address_pieces[0].isnumeric() and my_dict['SUBSTREETNAME'] and not my_dict['HOUSENUMBER']: pieces = [x.strip() for x in my_dict['SUBSTREETNAME'].split(',')] if pieces[-1].isnumeric() and pieces[-1] == address_pieces[0]: my_dict['HOUSENUMBER'] = pieces[-1] my_dict['SUBSTREETNAME'] = ", ".join(pieces[:-1]) my_dict['STREETADD1'] = my_dict['STREETADD2'] my_dict['STREETADD2'] = '' if not my_dict['FLAT'] and my_dict['HOUSENAME']: my_dict['FLAT'] = my_dict['HOUSENAME'] if not my_dict['HOUSENUMBER'] and my_dict['HOUSENAME']: my_dict['HOUSENUMBER'] = my_dict['HOUSENAME'] my_dict['SURNAME'] = my_dict['SURNAME'].replace('(z) ', '') my_dict['DO18'] = datetime.strptime(my_dict['DO18'], '%d/%m/%Y').date() if my_dict['DO18'] else None my_dict = transform_dict(my_dict, rename_dict) return my_dict
def handle(self, *args, **options): filename = options['filename'][0] with open(filename) as myfile: print("Reading electoral data...") reader = csv.DictReader(myfile) data = [transform_dict(x, rename_dict) for x in reader] data.sort(key=groupby_key) print("done - %d records read" % len(data)) records_done = 0 temp_list, error_list = [], [] for grouper, my_group in groupby(data, key=groupby_key): my_group = list(my_group) domecile_dict = split_dict(my_group[0], domecile_elements) domecile_dict['electoral_registration_office'] = self.ero domecile_obj, result = Domecile.objects.get_or_create( **domecile_dict) for line in my_group: contact_dict = split_dict(line, contact_elements) if contact_dict['date_of_attainment']: temp = [ int(x) for x in contact_dict['date_of_attainment'].split('/') ] contact_dict['date_of_attainment'] = date( temp[2], temp[1], temp[0]) else: contact_dict['date_of_attainment'] = None contact_obj = Contact.objects.filter( ero_number=contact_dict['ero_number'], domecile__electoral_registration_office=self.ero, pd=contact_dict['pd']).first() records_done += 1 if not contact_obj: contact_obj = Contact(**contact_dict) contact_obj.domecile = domecile_obj temp_list.append(contact_obj) if records_done % 5 == 0: try: print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj)) except: pass try: Contact.objects.bulk_create(temp_list) except: error_list += temp_list temp_list = [] if temp_list: try: Contact.objects.bulk_create(temp_list) except: error_list += temp_list if error_list: for i in error_list: try: i.save() except: try: print(i) except: pass