def post_import(self):
        # iterate self.missing_stations + insert
        # points are missing and we have no postcodes to geocode
        self.stations = StationSet()
        for record in self.missing_stations:
            address_parts = self.split_address(record[2])
            self.add_polling_station({
                'internal_council_id': record[1],
                'postcode': address_parts['postcode'],
                'address': address_parts['address'],
                'location': None,
                'council': self.council
            })
        self.stations.save()
        """
        This data isn't great – the polygons seem to be corrupt in some way.

        PostGIS can fix them though!
        """
        print("running fixup SQL")
        table_name = PollingDistrict()._meta.db_table

        cursor = connection.cursor()
        cursor.execute("""
        UPDATE {0}
         SET area=ST_Multi(ST_CollectionExtract(ST_MakeValid(area), 3))
         WHERE NOT ST_IsValid(area);
        """.format(table_name))
class BaseGenericApiImporter(BaseStationsDistrictsImporter):
    srid = 4326
    districts_srid = 4326

    districts_name = None
    districts_url = None

    stations_name = None
    stations_url = None

    local_files = False

    def import_data(self):
        self.districts = DistrictSet()
        self.stations = StationSet()

        # deal with 'stations only' or 'districts only' data
        if self.districts_url is not None:
            self.import_polling_districts()
        if self.stations_url is not None:
            self.import_polling_stations()

        self.districts.save()
        self.stations.save()

    def get_districts(self):
        with tempfile.NamedTemporaryFile() as tmp:
            req = urllib.request.urlretrieve(self.districts_url, tmp.name)
            return self.get_data(self.districts_filetype, tmp.name)

    def get_stations(self):
        with tempfile.NamedTemporaryFile() as tmp:
            req = urllib.request.urlretrieve(self.stations_url, tmp.name)
            return self.get_data(self.stations_filetype, tmp.name)
class BaseStationsAddressesImporter(BaseStationsImporter,
                                    BaseAddressesImporter):

    def import_data(self):
        self.stations = StationSet()
        self.addresses = AddressSet(self.logger)
        self.import_residential_addresses()
        self.import_polling_stations()
        self.addresses.save(self.batch_size)
        self.stations.save()
class BaseStationsDistrictsImporter(BaseStationsImporter,
                                    BaseDistrictsImporter):

    def import_data(self):
        self.stations = StationSet()
        self.districts = DistrictSet()
        self.import_polling_districts()
        self.import_polling_stations()
        self.districts.save()
        self.stations.save()
 def post_import(self):
     # mop up any districts where we have a station address
     # attached to a district code but no point
     self.stations = StationSet()
     for code in self.station_addresses:
         self.add_polling_station({
             "internal_council_id": code,
             "postcode": "",
             "address": self.station_addresses[code],
             "location": None,
             "council": self.council,
         })
     self.stations.save()
 def post_import(self):
     # mop up any districts where we have a station address
     # attached to a district code but no point
     self.stations = StationSet()
     for code in self.station_addresses:
         self.add_polling_station({
             'internal_council_id': code,
             'postcode': '',
             'address': self.station_addresses[code],
             'location': None,
             'council': self.council
         })
     self.stations.save()
    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.stations = StationSet()
        self.addresses = AddressSet(self.logger)
        self.import_residential_addresses()
        self.import_polling_stations()
        self.addresses.save(self.batch_size)
        self.stations.save()
Beispiel #8
0
    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.stations = StationSet()
        self.districts = DistrictSet()
        self.import_polling_districts()
        self.import_polling_stations()
        self.districts.save()
        self.stations.save()
 def import_data(self):
     self.stations = StationSet()
     self.districts = DistrictSet()
     self.import_polling_districts()
     self.import_polling_stations()
     self.districts.save()
     self.stations.save()
    def post_import(self):
        # iterate self.missing_stations + insert
        # points are missing and we have no postcodes to geocode
        self.stations = StationSet()
        for record in self.missing_stations:
            address_parts = self.split_address(record[2])
            self.add_polling_station({
                'internal_council_id': record[1],
                'postcode'           : address_parts['postcode'],
                'address'            : address_parts['address'],
                'location'           : None,
                'council'            : self.council
            })
        self.stations.save()

        """
        This data isn't great – the polygons seem to be corrupt in some way.

        PostGIS can fix them though!
        """
        print("running fixup SQL")
        table_name = PollingDistrict()._meta.db_table

        cursor = connection.cursor()
        cursor.execute("""
        UPDATE {0}
         SET area=ST_Multi(ST_CollectionExtract(ST_MakeValid(area), 3))
         WHERE NOT ST_IsValid(area);
        """.format(table_name))
Beispiel #11
0
    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.districts = DistrictSet()
        self.stations = StationSet()

        # deal with 'stations only' or 'districts only' data
        if self.districts_url is not None:
            self.import_polling_districts()
        if self.stations_url is not None:
            self.import_polling_stations()

        self.districts.save()
        self.stations.save()
Beispiel #12
0
class BaseStationsDistrictsImporter(BaseStationsImporter,
                                    BaseDistrictsImporter):
    def pre_import(self):
        raise NotImplementedError

    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.stations = StationSet()
        self.districts = DistrictSet()
        self.import_polling_districts()
        self.import_polling_stations()
        self.districts.save()
        self.stations.save()
    def import_data(self):
        # override import_data so we can populate
        # self.split_districts as a pre-process
        self.find_split_districts()

        self.stations = StationSet()
        self.districts = DistrictSet()
        self.import_polling_districts()
        self.import_polling_stations()
        self.districts.save()
        self.stations.save()
Beispiel #14
0
class BaseGenericApiImporter(BaseStationsDistrictsImporter):
    srid = 4326
    districts_srid = 4326

    districts_name = None
    districts_url = None

    stations_name = None
    stations_url = None

    local_files = False

    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.districts = DistrictSet()
        self.stations = StationSet()

        # deal with 'stations only' or 'districts only' data
        if self.districts_url is not None:
            self.import_polling_districts()
        if self.stations_url is not None:
            self.import_polling_stations()

        self.districts.save()
        self.stations.save()

    def get_districts(self):
        with tempfile.NamedTemporaryFile() as tmp:
            urllib.request.urlretrieve(self.districts_url, tmp.name)
            return self.get_data(self.districts_filetype, tmp.name)

    def get_stations(self):
        with tempfile.NamedTemporaryFile() as tmp:
            urllib.request.urlretrieve(self.stations_url, tmp.name)
            return self.get_data(self.stations_filetype, tmp.name)
    def import_data(self):
        self.districts = DistrictSet()
        self.stations = StationSet()

        # deal with 'stations only' or 'districts only' data
        if self.districts_url is not None:
            self.import_polling_districts()
        if self.stations_url is not None:
            self.import_polling_stations()

        self.districts.save()
        self.stations.save()
Beispiel #16
0
class BaseStationsAddressesImporter(BaseStationsImporter,
                                    BaseAddressesImporter):

    fuzzy_match = True
    match_threshold = 100

    def pre_import(self):
        raise NotImplementedError

    def import_data(self):

        # Optional step for pre import tasks
        try:
            self.pre_import()
        except NotImplementedError:
            pass

        self.stations = StationSet()
        self.addresses = AddressList(self.logger)
        self.import_residential_addresses()
        self.import_polling_stations()
        self.addresses.save(self.batch_size, self.fuzzy_match,
                            self.match_threshold)
        self.stations.save()
class Command(BaseGitHubImporter):

    srid = 4326
    districts_srid = 4326
    council_id = "E07000106"
    elections = ["parl.2019-12-12"]
    scraper_name = "wdiv-scrapers/DC-PollingStations-Canterbury"
    geom_type = "geojson"

    # Canterbury embed the station addresses in the districts file
    # The stations endpoint only serves up the geo data
    # (it doesn't include the station addresses)
    station_addresses = {}

    def district_record_to_dict(self, record):
        poly = self.extract_geometry(record, self.geom_type,
                                     self.get_srid("districts"))
        code = record["ID"].strip()
        address = record["POLLING_PL"].strip()

        # Ad-hoc fixs for parl.2019-12-12
        # The points got updated in API, but the addresses didn't
        if code == "CWI2":
            address = "Thanington Neighbourhood Resource Centre\nThanington Road\nCanterbury\nCT1 3XE"
        if code == "RCS2":
            address = (
                "Chartham Sports Club\nBeech Avenue\nChartham\nCanterbury\nCT4 7TA"
            )

        if code in self.station_addresses and self.station_addresses[
                code] != address:
            raise ValueError(
                "District code appears twice with 2 different station addresses"
            )

        self.station_addresses[code] = address

        return {
            "internal_council_id": code,
            "name": record["NAME"].strip() + " - " + code,
            "area": poly,
            "polling_station_id": code,
        }

    def station_record_to_dict(self, record):
        code = record["Polling_di"].strip()
        address = self.station_addresses[code]
        del self.station_addresses[
            code]  # remove station addresses as we use them

        location = self.extract_geometry(record, self.geom_type,
                                         self.get_srid("stations"))
        if isinstance(location, MultiPoint) and len(location) == 1:
            location = location[0]

        # point supplied is bang on the building
        # but causes google directions API to give us a strange route
        if code == "CWE2" and address.startswith("St Dunstan"):
            location = Point(1.070064, 51.283614, srid=4326)

        return {
            "internal_council_id": code,
            "postcode": "",
            "address": address,
            "location": location,
        }

    def post_import(self):
        # mop up any districts where we have a station address
        # attached to a district code but no point
        self.stations = StationSet()
        for code in self.station_addresses:
            self.add_polling_station({
                "internal_council_id": code,
                "postcode": "",
                "address": self.station_addresses[code],
                "location": None,
                "council": self.council,
            })
        self.stations.save()
class Command(BaseCsvStationsShpDistrictsImporter):
    """
    Imports the Polling Station data from Calderdale
    """
    council_id       = 'E08000033'
    districts_name   = 'polling_districts'
    stations_name    = 'Polling Stations.csv'
    elections        = [
        'pcc.2016-05-05',
        'ref.2016-06-23'
    ]
    missing_stations = []

    def get_station_hash(self, record):
        return "-".join([
            record.address,
            record.polling_district,
            record.ward,
            record.easting,
            record.northing,
        ])

    def import_polling_districts(self):
        sf = shapefile.Reader("{0}/{1}".format(
            self.base_folder_path,
            self.districts_name
            ))
        for district in sf.shapeRecords():
            district_info = self.district_record_to_dict(district.record)
            if 'council' not in district_info:
                district_info['council'] = self.council

            geojson = json.dumps(district.shape.__geo_interface__)
            poly = self.clean_poly(GEOSGeometry(geojson, srid=self.get_srid('districts')))

            """
            File contains 2 districts with the code DC. One of them covers a distinct
            area not covered by another district. The other exactly contains districts
            DD and DE. I've assumed that the one covering a distinct area is 'correct'
            (i.e: A property may not be in 2 districts simultaneously).
            Discard the other district DC.
            """
            if district.record[1] == 'DC' and poly.length == 16675.9905799729:
                pass
            else:
                district_info['area'] = poly
                self.add_polling_district(district_info)

    def district_record_to_dict(self, record):

        """
        Districts BB and BC don't appear in the stations file
        but the station addresses are embedded in the districts
        file. Save them for later.
        """
        if record[1] == 'BB' or record[1] == 'BC':
            self.missing_stations.append(record)

        return {
            'internal_council_id': record[1],
            'name'               : "%s - %s" % (record[0], record[1]),
            'polling_station_id' : record[1]
        }

    def split_address(self, in_address):
        address_parts = in_address.replace('.', '').split(", ")

        if (len(address_parts[-1]) == 7 or len(address_parts[-1]) == 8) and address_parts[-1] != 'Halifax':
            out_address = "\n".join(address_parts[:-1])
            postcode = address_parts[-1]
        else:
            out_address = "\n".join(address_parts)
            postcode = ''

        return {
            'address'  : out_address,
            'postcode' : postcode
        }

    def station_record_to_dict(self, record):

        # discard the rows with no district id/address
        if not record.polling_district:
            return None

        location = Point(float(record.easting), float(record.northing), srid=self.get_srid())

        address_parts = self.split_address(record.address)

        return {
            'internal_council_id': record.polling_district,
            'postcode'           : address_parts['postcode'],
            'address'            : address_parts['address'],
            'location'           : location
        }

    def post_import(self):
        # iterate self.missing_stations + insert
        # points are missing and we have no postcodes to geocode
        self.stations = StationSet()
        for record in self.missing_stations:
            address_parts = self.split_address(record[2])
            self.add_polling_station({
                'internal_council_id': record[1],
                'postcode'           : address_parts['postcode'],
                'address'            : address_parts['address'],
                'location'           : None,
                'council'            : self.council
            })
        self.stations.save()

        """
        This data isn't great – the polygons seem to be corrupt in some way.

        PostGIS can fix them though!
        """
        print("running fixup SQL")
        table_name = PollingDistrict()._meta.db_table

        cursor = connection.cursor()
        cursor.execute("""
        UPDATE {0}
         SET area=ST_Multi(ST_CollectionExtract(ST_MakeValid(area), 3))
         WHERE NOT ST_IsValid(area);
        """.format(table_name))
class Command(BaseGitHubImporter):

    srid = 4326
    districts_srid = 4326
    council_id = "E07000106"
    elections = ["local.2019-05-02"]
    scraper_name = "wdiv-scrapers/DC-PollingStations-Canterbury"
    geom_type = "geojson"

    # Canterbury embed the station addresses in the districts file
    # The stations endpoint only serves up the geo data
    # (it doesn't include the station addresses)
    station_addresses = {}

    def district_record_to_dict(self, record):
        poly = self.extract_geometry(record, self.geom_type,
                                     self.get_srid("districts"))
        code = record["ID"].strip()
        address = record["POLLING_PL"].strip()

        if code in self.station_addresses and self.station_addresses[
                code] != address:
            raise ValueError(
                "District code appears twice with 2 different station addresses"
            )

        self.station_addresses[code] = address

        return {
            "internal_council_id": code,
            "name": record["NAME"].strip() + " - " + code,
            "area": poly,
            "polling_station_id": code,
        }

    def station_record_to_dict(self, record):
        code = record["Polling_di"].strip()
        address = self.station_addresses[code]
        del self.station_addresses[
            code]  # remove station addresses as we use them

        location = self.extract_geometry(record, self.geom_type,
                                         self.get_srid("stations"))
        if isinstance(location, MultiPoint) and len(location) == 1:
            location = location[0]

        return {
            "internal_council_id": code,
            "postcode": "",
            "address": address,
            "location": location,
        }

    def post_import(self):
        # mop up any districts where we have a station address
        # attached to a district code but no point
        self.stations = StationSet()
        for code in self.station_addresses:
            self.add_polling_station({
                "internal_council_id": code,
                "postcode": "",
                "address": self.station_addresses[code],
                "location": None,
                "council": self.council,
            })
        self.stations.save()
Beispiel #20
0
class Command(BaseGitHubImporter):

    srid = 4326
    districts_srid = 4326
    council_id = "E07000106"
    elections = ["parl.2017-06-08"]
    scraper_name = "wdiv-scrapers/DC-PollingStations-Canterbury"
    geom_type = "geojson"

    # Canterbury embed the station addresses in the districts file
    # The stations endpoint only serves up the geo data
    # (it doesn't include the station addresses)
    station_addresses = {}

    def district_record_to_dict(self, record):
        poly = self.extract_geometry(record, self.geom_type,
                                     self.get_srid("districts"))
        code = record["ID"].strip()
        address = record["POLLING_PL"].strip()

        if code in self.station_addresses and self.station_addresses[
                code] != address:
            raise ValueError(
                "District code appears twice with 2 different station addresses"
            )

        self.station_addresses[code] = address

        return {
            "internal_council_id": code,
            "name": record["NAME"].strip() + " - " + code,
            "area": poly,
            "polling_station_id": code,
        }

    def extract_json_point(self, record, srid):
        geom = json.loads(record["geometry"])

        # if geometry object is a MultiPoint with only one Point in it, convert it to a Point
        if (geom["geometry"]["type"] == "MultiPoint"
                and len(geom["geometry"]["coordinates"]) == 1):
            geom["geometry"]["type"] = "Point"
            geom["geometry"]["coordinates"] = geom["geometry"]["coordinates"][
                0]

        geojson = json.dumps(geom["geometry"])
        return self.clean_poly(GEOSGeometry(geojson, srid=srid))

    def station_record_to_dict(self, record):
        code = record["Polling_di"].strip()
        address = self.station_addresses[code]
        del (self.station_addresses[code]
             )  # remove station addresses as we use them
        location = self.extract_json_point(record, self.get_srid("stations"))
        return {
            "internal_council_id": code,
            "postcode": "",
            "address": address,
            "location": location,
        }

    def post_import(self):
        # mop up any districts where we have a station address
        # attached to a district code but no point
        self.stations = StationSet()
        for code in self.station_addresses:
            self.add_polling_station({
                "internal_council_id": code,
                "postcode": "",
                "address": self.station_addresses[code],
                "location": None,
                "council": self.council,
            })
        self.stations.save()
class Command(BaseMorphApiImporter):

    srid = 4326
    districts_srid = 4326
    council_id = 'E07000106'
    elections = ['local.kent.2017-05-04']
    scraper_name = 'wdiv-scrapers/DC-PollingStations-Canterbury'
    geom_type = 'geojson'

    # Canterbury embed the station addresses in the districts file
    # The stations endpoint only serves up the geo data
    # (it doesn't include the station addresses)
    station_addresses = {}

    def district_record_to_dict(self, record):
        poly = self.extract_geometry(record, self.geom_type,
                                     self.get_srid('districts'))
        code = record['ID'].strip()
        address = record['POLLING_PL'].strip()

        if code in self.station_addresses and self.station_addresses[
                code] != address:
            raise ValueError(
                'District code appears twice with 2 different station addresses'
            )

        self.station_addresses[code] = address

        return {
            'internal_council_id': code,
            'name': record['NAME'].strip() + ' - ' + code,
            'area': poly,
            'polling_station_id': code,
        }

    def extract_json_point(self, record, srid):
        geom = json.loads(record['geometry'])

        # if geometry object is a MultiPoint with only one Point in it, convert it to a Point
        if geom['geometry']['type'] == 'MultiPoint' and len(
                geom['geometry']['coordinates']) == 1:
            geom['geometry']['type'] = 'Point'
            geom['geometry']['coordinates'] = geom['geometry']['coordinates'][
                0]

        geojson = json.dumps(geom['geometry'])
        return self.clean_poly(GEOSGeometry(geojson, srid=srid))

    def station_record_to_dict(self, record):
        code = record['Polling_di'].strip()
        address = self.station_addresses[code]
        del (self.station_addresses[code]
             )  # remove station addresses as we use them
        location = self.extract_json_point(record, self.get_srid('stations'))
        return {
            'internal_council_id': code,
            'postcode': '',
            'address': address,
            'location': location,
        }

    def post_import(self):
        # mop up any districts where we have a station address
        # attached to a district code but no point
        self.stations = StationSet()
        for code in self.station_addresses:
            self.add_polling_station({
                'internal_council_id': code,
                'postcode': '',
                'address': self.station_addresses[code],
                'location': None,
                'council': self.council
            })
        self.stations.save()
class Command(BaseCsvStationsShpDistrictsImporter):
    """
    Imports the Polling Station data from Calderdale
    """
    council_id = 'E08000033'
    districts_name = 'polling_districts'
    stations_name = 'Polling Stations.csv'
    elections = ['pcc.2016-05-05', 'ref.2016-06-23']
    missing_stations = []

    def get_station_hash(self, record):
        return "-".join([
            record.address,
            record.polling_district,
            record.ward,
            record.easting,
            record.northing,
        ])

    def import_polling_districts(self):
        sf = shapefile.Reader("{0}/{1}".format(self.base_folder_path,
                                               self.districts_name))
        for district in sf.shapeRecords():
            district_info = self.district_record_to_dict(district.record)
            if 'council' not in district_info:
                district_info['council'] = self.council

            geojson = json.dumps(district.shape.__geo_interface__)
            poly = self.clean_poly(
                GEOSGeometry(geojson, srid=self.get_srid('districts')))
            """
            File contains 2 districts with the code DC. One of them covers a distinct
            area not covered by another district. The other exactly contains districts
            DD and DE. I've assumed that the one covering a distinct area is 'correct'
            (i.e: A property may not be in 2 districts simultaneously).
            Discard the other district DC.
            """
            if district.record[1] == 'DC' and poly.length == 16675.9905799729:
                pass
            else:
                district_info['area'] = poly
                self.add_polling_district(district_info)

    def district_record_to_dict(self, record):
        """
        Districts BB and BC don't appear in the stations file
        but the station addresses are embedded in the districts
        file. Save them for later.
        """
        if record[1] == 'BB' or record[1] == 'BC':
            self.missing_stations.append(record)

        return {
            'internal_council_id': record[1],
            'name': "%s - %s" % (record[0], record[1]),
            'polling_station_id': record[1]
        }

    def split_address(self, in_address):
        address_parts = in_address.replace('.', '').split(", ")

        if (len(address_parts[-1]) == 7 or len(address_parts[-1])
                == 8) and address_parts[-1] != 'Halifax':
            out_address = "\n".join(address_parts[:-1])
            postcode = address_parts[-1]
        else:
            out_address = "\n".join(address_parts)
            postcode = ''

        return {'address': out_address, 'postcode': postcode}

    def station_record_to_dict(self, record):

        # discard the rows with no district id/address
        if not record.polling_district:
            return None

        location = Point(float(record.easting),
                         float(record.northing),
                         srid=self.get_srid())

        address_parts = self.split_address(record.address)

        return {
            'internal_council_id': record.polling_district,
            'postcode': address_parts['postcode'],
            'address': address_parts['address'],
            'location': location
        }

    def post_import(self):
        # iterate self.missing_stations + insert
        # points are missing and we have no postcodes to geocode
        self.stations = StationSet()
        for record in self.missing_stations:
            address_parts = self.split_address(record[2])
            self.add_polling_station({
                'internal_council_id': record[1],
                'postcode': address_parts['postcode'],
                'address': address_parts['address'],
                'location': None,
                'council': self.council
            })
        self.stations.save()
        """
        This data isn't great – the polygons seem to be corrupt in some way.

        PostGIS can fix them though!
        """
        print("running fixup SQL")
        table_name = PollingDistrict()._meta.db_table

        cursor = connection.cursor()
        cursor.execute("""
        UPDATE {0}
         SET area=ST_Multi(ST_CollectionExtract(ST_MakeValid(area), 3))
         WHERE NOT ST_IsValid(area);
        """.format(table_name))
class Command(BaseMorphApiImporter):

    srid = 4326
    districts_srid  = 4326
    council_id = 'E07000228'
    elections = ['local.west-sussex.2017-05-04']
    scraper_name = 'wdiv-scrapers/DC-PollingStations-Mid-Sussex'
    geom_type = 'geojson'
    split_districts = set()

    def get_station_hash(self, record):
        # handle exact dupes on code/address
        return "-".join([
            record['msercode'],
            record['uprn']
        ])

    def find_split_districts(self):
        # identify any district codes which appear more than once
        # with 2 different polling station addresses.
        # We do not want to import these.
        stations = self.get_stations()
        for station1 in stations:
            for station2 in stations:
                if (station2['msercode'] == station1['msercode'] and\
                    station2['uprn'] != station1['uprn']):
                    self.split_districts.add(station1['msercode'])

    def district_record_to_dict(self, record):
        poly = self.extract_geometry(record, self.geom_type, self.get_srid('districts'))
        return {
            'internal_council_id': record['msercode'],
            'name'               : record['boundname'],
            'area'               : poly,
            'polling_station_id' : record['msercode'],
        }

    def station_record_to_dict(self, record):

        # handle split districts
        if record['msercode'] in self.split_districts:
            return None

        location = self.extract_geometry(record, self.geom_type, self.get_srid('stations'))
        return {
            'internal_council_id': record['msercode'],
            'postcode':            '',
            'address':             record['address'],
            'location':            location,
        }

    def import_data(self):
        # override import_data so we can populate
        # self.split_districts as a pre-process
        self.find_split_districts()

        self.stations = StationSet()
        self.districts = DistrictSet()
        self.import_polling_districts()
        self.import_polling_stations()
        self.districts.save()
        self.stations.save()