def preprocess_dict(my_dict):
    my_dict['Date of Attainment'] = datetime.strptime(
        my_dict['Date of Attainment'],
        '%d/%m/%Y').date() if my_dict['Date of Attainment'] else None

    for i in range(5, 0, -1):
        address_field = 'Address %d' % i
        if postcode_re.match(my_dict[address_field]):
            my_dict['postcode'] = my_dict[address_field]
            my_dict[address_field] = ''
            break
    else:
        my_dict['postcode'] = ''

    address_pieces = [
        my_dict["Address %d" % x] for x in range(1, 7)
        if my_dict["Address %d" % x]
    ]
    if address_pieces[0][0].isdigit():
        first_pieces = address_pieces[0].split(" ")
        my_dict['Address 2'] = first_pieces[0]
        my_dict['Address 3'] = ''
        my_dict['Address 4'] = " ".join(first_pieces[1:])
        for i, piece in enumerate(address_pieces[1:]):
            my_dict['Address %d' % (i + 5)] = piece
        my_dict['Address 1'] = ''
    my_dict = transform_dict(my_dict, rename_dict)
    return my_dict
예제 #2
0
    def handle(self, *args, **options):
        for filename in options['filenames']:
            with open(filename) as myfile:
                print("Reading electoral data...")
                reader = list(csv.DictReader(myfile))

            for k, line in enumerate(reader):
                new_dict = transform_dict(line, rename_dict)
                if k == 0:
                    print(new_dict.keys())
                for i in new_dict:
                    new_dict[i] = new_dict[i].replace(",", '')
                    if not new_dict[i]:
                        new_dict[i] = None
                        continue
                    if 'rank' in i:
                        new_dict[i] = str(int(float(new_dict[i])))
                        continue
                    s = regex.search(new_dict[i])
                    if not s:
                        new_dict[i] = None
                    else:
                        new_dict[i] = s.group(0)

                datazone = DataZone.objects.get(code=line['Data Zone'].strip())
                try:
                    DataZoneSIMDInfo.objects.update_or_create(
                        defaults=new_dict, datazone=datazone)
                except:
                    print(new_dict)
                    pass
예제 #3
0
def preprocess_dict(my_dict):
    my_dict = {x: y.decode('iso8859_2') for x, y in my_dict.items()}
    # print(my_dict['Date Of Attainment'], type(my_dict['Date Of Attainment']))
    my_dict['Date Of Attainment'] = datetime.strptime(
        my_dict['Date Of Attainment'],
        '%d/%m/%Y').date() if my_dict['Date Of Attainment'] else ''

    for i in range(5, 0, -1):
        address_field = 'Address %d' % i
        if postcode_re.match(my_dict[address_field]):
            my_dict['postcode'] = my_dict[address_field]
            my_dict[address_field] = ''
            break
    else:
        my_dict['postcode'] = ''

    address_pieces = [
        my_dict["Address %d" % x] for x in range(1, 7)
        if my_dict["Address %d" % x]
    ]
    if address_pieces[0][0].isdigit():
        first_pieces = address_pieces[0].split(" ")
        my_dict['Address 2'] = first_pieces[0]
        my_dict['Address 3'] = ''
        my_dict['Address 4'] = " ".join(first_pieces[1:])
        for i, piece in enumerate(address_pieces[1:]):
            my_dict['Address %d' % (i + 5)] = piece
        my_dict['Address 1'] = ''
    # print(my_dict, rename_dict)
    my_dict = transform_dict(my_dict, rename_dict)
    if my_dict['date_of_attainment'] == '':
        my_dict['date_of_attainment'] = None
    return my_dict
예제 #4
0
    def handle(self, *args, **options):
        filename = options['filename'][0]
        with open(filename) as myfile:
            print("Reading electoral data...")
            reader = csv.DictReader(myfile)
            data = [transform_dict(x, rename_dict) for x in reader]
            for line in data:
                if 'postcode' not in line or not line['postcode']:
                    for i in range(7, 0, -1):
                        index = 'address_' + str(i)
                        if line[index]:
                            try:
                                line['postcode'] = line['address_' + str(i)]
                                line['address_%d' + str(i)] = ''
                                break
                            except KeyError:
                                line['postcode'] = ''
                                break
                            finally:
                                if line['postcode'] == 'OTHER ELECTORS':
                                    line['postcode'] = ''
            data.sort(key=groupby_key)
            print("done - %d records read" % len(data))

        records_done = 0
        temp_list = []
        for grouper, my_group in groupby(data, key=groupby_key):
            my_group = list(my_group)
            domecile_dict = split_dict(my_group[0], domecile_elements)
            domecile_dict['electoral_registration_office'] = self.ero
            try:
                domecile_obj, result = Domecile.objects.get_or_create(**domecile_dict)
            except DataError:
                print(domecile_dict)
                raise
            for line in my_group:
                contact_dict = split_dict(line, contact_elements)
                if contact_dict['date_of_attainment']:
                    temp = [int(x) for x in contact_dict['date_of_attainment'].split('/')]
                    contact_dict['date_of_attainment'] = date(temp[2], temp[1], temp[0])
                else:
                    contact_dict['date_of_attainment'] = None
                contact_obj = Contact.objects.filter(ero_number=contact_dict['ero_number'],
                                                     domecile__electoral_registration_office=self.ero,
                                                     pd=contact_dict['pd']).first()
                records_done += 1
                if not contact_obj:
                    contact_obj = Contact(**contact_dict)
                    contact_obj.domecile = domecile_obj
                    temp_list.append(contact_obj)
                    if records_done % 1000 == 0:
                        print("%d records done - last one %s, %s" % (records_done, contact_obj, domecile_obj))
                        Contact.objects.bulk_create(temp_list)
                        temp_list = []
        if temp_list:
            Contact.objects.bulk_create(temp_list)
            print(temp_list)
def preprocess_dict(my_dict):
    address_pieces = my_dict['STREETADD1'].split()
    if address_pieces and address_pieces[0].isnumeric() and my_dict['SUBSTREETNAME'] and not my_dict['HOUSENUMBER']:
        pieces = [x.strip() for x in my_dict['SUBSTREETNAME'].split(',')]
        if pieces[-1].isnumeric() and pieces[-1] == address_pieces[0]:
            my_dict['HOUSENUMBER'] = pieces[-1]
            my_dict['SUBSTREETNAME'] = ", ".join(pieces[:-1])
            my_dict['STREETADD1'] = my_dict['STREETADD2']
            my_dict['STREETADD2'] = ''
    if not my_dict['FLAT'] and my_dict['HOUSENAME']:
        my_dict['FLAT'] = my_dict['HOUSENAME']
    if not my_dict['HOUSENUMBER'] and my_dict['HOUSENAME']:
        my_dict['HOUSENUMBER'] = my_dict['HOUSENAME']
    my_dict['SURNAME'] = my_dict['SURNAME'].replace('(z) ', '')
    my_dict['DO18'] = datetime.strptime(my_dict['DO18'], '%d/%m/%Y').date() if my_dict['DO18'] else None
    my_dict = transform_dict(my_dict, rename_dict)
    return my_dict
예제 #6
0
    def handle(self, *args, **options):
        filename = options['filename'][0]
        with open(filename) as myfile:
            print("Reading electoral data...")
            reader = csv.DictReader(myfile)
            data = [transform_dict(x, rename_dict) for x in reader]
            data.sort(key=groupby_key)
            print("done - %d records read" % len(data))

        records_done = 0
        temp_list, error_list = [], []
        for grouper, my_group in groupby(data, key=groupby_key):
            my_group = list(my_group)
            domecile_dict = split_dict(my_group[0], domecile_elements)
            domecile_dict['electoral_registration_office'] = self.ero
            domecile_obj, result = Domecile.objects.get_or_create(
                **domecile_dict)
            for line in my_group:
                contact_dict = split_dict(line, contact_elements)
                if contact_dict['date_of_attainment']:
                    temp = [
                        int(x)
                        for x in contact_dict['date_of_attainment'].split('/')
                    ]
                    contact_dict['date_of_attainment'] = date(
                        temp[2], temp[1], temp[0])
                else:
                    contact_dict['date_of_attainment'] = None
                contact_obj = Contact.objects.filter(
                    ero_number=contact_dict['ero_number'],
                    domecile__electoral_registration_office=self.ero,
                    pd=contact_dict['pd']).first()
                records_done += 1
                if not contact_obj:
                    contact_obj = Contact(**contact_dict)
                    contact_obj.domecile = domecile_obj
                    temp_list.append(contact_obj)
                    if records_done % 5 == 0:
                        try:
                            print("%d records done - last one %s, %s" %
                                  (records_done, contact_obj, domecile_obj))
                        except:
                            pass
                        try:
                            Contact.objects.bulk_create(temp_list)
                        except:
                            error_list += temp_list
                        temp_list = []
        if temp_list:
            try:
                Contact.objects.bulk_create(temp_list)
            except:
                error_list += temp_list

        if error_list:
            for i in error_list:
                try:
                    i.save()
                except:
                    try:
                        print(i)
                    except:
                        pass