Ejemplo n.º 1
0
 def __init__(self, just_index=False, register_end="statisticalarealevel1"):
     self.url_count = 0
     self.just_index = just_index
     self.register_end = register_end
     self.client = LDAPIClient(
         "http://linked.data.gov.au/dataset/asgs2016",
         url_remapper=LDAPI_CLIENT_REMAPPER
     )
     self.register = self.client.register("http://linked.data.gov.au/dataset/asgs2016/{}/".format(self.register_end))
Ejemplo n.º 2
0
 def __init__(self, just_index=False, register_end="catchment",):
     self.url_count = 0
     self.just_index = just_index
     self.register_end = register_end
     self.client = LDAPIClient(
         "http://geofabricld.net",
         url_remapper=LDAPI_CLIENT_REMAPPER
     )
     self.register = self.client.register("http://geofabricld.net/{}/".format(self.register_end))
Ejemplo n.º 3
0
 def __init__(self, just_index=False, register_end="locality"):
     self.url_count = 0
     self.just_index = just_index
     self.register_end = register_end
     self.client = LDAPIClient(
         "http://linked.data.gov.au/dataset/gnaf",
         url_remapper=LDAPI_CLIENT_REMAPPER
     )
     self.register = self.client.register("http://linked.data.gov.au/dataset/gnaf/{}/".format(self.register_end))
Ejemplo n.º 4
0
def sync_test_script():
    remapper = {
        "http://test.linked.data.gov.au/dataset/asgs/": "http://localhost:5000/"
    }
    base = "http://test.linked.data.gov.au/dataset/asgs/reg"
    client = LDAPIClient(base, url_remapper=remapper, asynchronous=False, threads=1)
    register = client.register("http://test.linked.data.gov.au/dataset/asgs/sa4/")
    first_page = register.index_page(per_page=50)
    a = register.index()
    instances = register.instances(index=a, min_count=20)
    client.close()
    # import pickle
    # with open("state_index.pickle", 'wb') as f:
    #     pickle.dump(a, f)
    print(len(a))
    print(len(instances))
    return
Ejemplo n.º 5
0
class GeofabricFeatureExporter(object):

    def write_worksheet_header(self, worksheet):
        """
        :param worksheet:
        :type worksheet: xlsxwriter.writer.Worksheet
        :return:
        """
        i = 0
        for i, h in enumerate(HEADERS):
            worksheet._write_string(0, i, h)
        worksheet.set_column(0, 0, 36)
        worksheet.set_column(1, 1, 36)
        if self.register_end == "catchment":
            worksheet._write_string(0, i+1, "riverRegion")
            worksheet.set_column(2, 2, 36)
            worksheet._write_string(0, i+2, "drainageDivision")
            worksheet.set_column(2, 3, 36)

    def write_csv_header(self, csv_file):
        """
        :param csv_file:
        :type csv_file: IO
        :return:
        """
        headers = list(HEADERS)
        if self.register_end == "catchment":
            headers.append("riverRegion")
            headers.append("drainageDivision")
        for h in headers[0:-1]:
            csv_file.write("{},".format(h))
        csv_file.write("{}\n".format(headers[-1]))

    def write_worksheet_url(self, worksheet, row, col, url, text):
        if self.url_count > 65500:
            worksheet._write_string(row, col, str(url))
        else:
            worksheet.write_url(row, col, str(url), string=str(text))
            self.url_count += 1

    def write_worksheet_item(self, worksheet, i, identifier, class_uri, has_rr=None, has_dd=None):
        identifier_text = identifier.rsplit('/', 1)[-1]
        row = i+1
        self.write_worksheet_url(worksheet, row, 0, identifier, identifier_text)
        class_text = class_uri.rsplit('/', 1)[-1]
        self.write_worksheet_url(worksheet, row, 1, class_uri, class_text)
        if self.register_end == "catchment":
            if has_rr:
                has_rr_text = has_rr.rsplit('/', 1)[-1]
                self.write_worksheet_url(worksheet, row, 2, has_rr, has_rr_text)
            if has_dd:
                has_dd_text = has_dd.rsplit('/', 1)[-1]
                self.write_worksheet_url(worksheet, row, 3, has_dd, has_dd_text)

    def write_csv_item(self, csv_file, identifier, class_uri, has_rr=None, has_dd=None):
        csv_file.write("{},".format(str(identifier)))
        csv_file.write("{}".format(str(class_uri)))
        if self.register_end == "catchment":
            if has_rr:
                csv_file.write(",{}".format(str(has_rr)))
            else:
                csv_file.write(",")
            if has_dd:
                csv_file.write(",{}\n".format(str(has_dd)))
            else:
                csv_file.write(",\n")
        else:
            csv_file.write("\n")

    def export_csv_index(self, csv_file, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_csv_item(csv_file, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_index(self, worksheet, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_worksheet_item(worksheet, i, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_all(self, worksheet, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, sixteen in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=sixteen)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                try:
                    has_within = d[geo_sfwithin]
                    has_within = [w['@id'] for w in has_within]
                    has_rr = None
                    has_dd = None
                    for w in has_within:
                        if "/riverregion/" in w:
                            has_rr = str(w)
                        elif "/drainagedivision/" in w:
                            has_dd = str(w)
                except (KeyError, IndexError, AttributeError):
                    has_rr = None
                    has_dd = None
                except Exception as e:
                    raise e
                self.write_worksheet_item(worksheet, (j*16)+i, identifier, class_name, has_rr, has_dd)

    def export_csv_all(self, csv_file, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, sixteen in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=sixteen)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                try:
                    has_within = d[geo_sfwithin]
                    has_within = [w['@id'] for w in has_within]
                    has_rr = None
                    has_dd = None
                    for w in has_within:
                        if "/riverregion/" in w:
                            has_rr = str(w)
                        elif "/drainagedivision/" in w:
                            has_dd = str(w)
                except (KeyError, IndexError, AttributeError):
                    has_rr = None
                    has_dd = None
                self.write_csv_item(csv_file, identifier, class_name, has_rr, has_dd)

    def _export_as_xlsx(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        #reset to first page, and increase per_page
        _ = self.register.index_page(page=1, per_page=100)
        pickle_filename = "{}_index_{}.pickle".format(suffix, str(limit))
        try:
            with open(pickle_filename, 'rb') as f:
                index = pickle.load(f)
        except FileNotFoundError:
            index = self.register.index(offset=28000, min_count=limit)
            with open(pickle_filename, 'wb') as f:
                pickle.dump(index, f)
        dont_close_workbook = False
        if 'workbook' in kwargs:
            workbook = kwargs.pop('workbook')
            dont_close_workbook = True
        else:
            workbook = xlsxwriter.Workbook('geofabric_{}.xlsx'.format(suffix))
        worksheet = workbook.add_worksheet()
        if 'worksheet_name' in kwargs:
            name = kwargs.pop('worksheet_name')
            worksheet.name = name
        self.write_worksheet_header(worksheet)
        if self.just_index:
            self.export_xlsx_index(worksheet, index, limit=limit)
        else:
            self.export_xlsx_all(worksheet, index, limit=limit)
        if not dont_close_workbook:
            workbook.close()

    def _export_as_csv(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        pickle_filename = "{}_index_{}.pickle".format(suffix, str(limit))
        try:
            with open(pickle_filename, 'rb') as f:
                index = pickle.load(f)
        except:
            index = self.register.index(offset=28000, min_count=limit)
            with open(pickle_filename, 'wb') as f:
                pickle.dump(index, f)
        file = open("geofabric_{}.csv".format(suffix), 'w', encoding='utf-8')
        try:
            self.write_csv_header(file)
            if self.just_index:
                self.export_csv_index(file, index, limit=limit)
            else:
                self.export_csv_all(file, index, limit=limit)
        finally:
            file.close()

    def export(self, *args, mode='csv', **kwargs):
        if mode == "excel" or mode == "xlsx":
            return self._export_as_xlsx(*args, **kwargs)
        elif mode == "csv":
            return self._export_as_csv(*args, **kwargs)
        else:
            raise NotImplementedError(
                "No exporter mode \"{}\"".format(mode))

    def __init__(self, just_index=False, register_end="catchment",):
        self.url_count = 0
        self.just_index = just_index
        self.register_end = register_end
        self.client = LDAPIClient(
            "http://geofabricld.net",
            url_remapper=LDAPI_CLIENT_REMAPPER
        )
        self.register = self.client.register("http://geofabricld.net/{}/".format(self.register_end))
Ejemplo n.º 6
0
class ASGSFeatureExporter(object):

    def write_worksheet_header(self, worksheet):
        """
        :param worksheet:
        :type worksheet: xlsxwriter.writer.Worksheet
        :return:
        """
        headers = list(HEADERS)
        if self.register_end == "meshblock":
            headers.append("State")
            headers.append("Within SA1")
        elif self.register_end == "statisticalarealevel1":
            headers.append("State")
            headers.append("Within SA2")
        elif self.register_end == "statisticalarealevel2":
            headers.append("State")
            headers.append("Within SA3")
        elif self.register_end == "statisticalarealevel3":
            headers.append("State")
            headers.append("Within SA4")
        elif self.register_end == "statisticalarealevel4":
            headers.append("State")
        for i, h in enumerate(headers):
            worksheet._write_string(0, i, h)
            worksheet.set_column(i, i, 20)

    def write_csv_header(self, csv_file):
        """
        :param csv_file:
        :type csv_file: IO
        :return:
        """
        headers = list(HEADERS)
        if self.register_end == "meshblock":
            headers.append("State")
            headers.append("Within_sa1")
        elif self.register_end == "statisticalarealevel1":
            headers.append("State")
            headers.append("Within_sa2")
        elif self.register_end == "statisticalarealevel2":
            headers.append("State")
            headers.append("Within_sa3")
        elif self.register_end == "statisticalarealevel3":
            headers.append("State")
            headers.append("Within_sa4")
        elif self.register_end == "statisticalarealevel4":
            headers.append("State")
        for h in headers[0:-1]:
            csv_file.write("{},".format(h))
        csv_file.write("{}\n".format(headers[-1]))

    def write_worksheet_url(self, worksheet, row, col, url, text):
        if self.url_count > 65500:
            worksheet._write_string(row, col, str(url))
        else:
            worksheet.write_url(row, col, str(url), string=str(text))
            self.url_count += 1

    def write_worksheet_item(self, worksheet, i, identifier, class_uri, has_state=None, has_within=None):
        identifier_text = identifier.rsplit('/', 1)[-1]
        self.write_worksheet_url(worksheet, i + 1, 0, identifier, identifier_text)
        class_text = class_uri.rsplit('/', 1)[-1]
        self.write_worksheet_url(worksheet, i + 1, 1, class_uri, class_text)
        if has_state:
            has_state_text = has_state.rsplit('/', 1)[-1]
            self.write_worksheet_url(worksheet, i + 1, 2, has_state, has_state_text)
        if has_within:
            has_within_text = has_within.rsplit('/', 1)[-1]
            self.write_worksheet_url(worksheet, i + 1, 3, has_within, has_within_text)

    def write_csv_item(self, csv_file, identifier, class_uri, has_state=None, has_within=None):
        csv_file.write("{},".format(str(identifier)))
        csv_file.write("{},".format(str(class_uri)))
        if has_state:
            csv_file.write("{},".format(str(has_state)))
        else:
            csv_file.write(",")
        if has_within:
            csv_file.write("{}\n".format(str(has_within)))
        else:
            csv_file.write("\n")

    def export_csv_index(self, csv_file, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_csv_item(csv_file, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_index(self, worksheet, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_worksheet_item(worksheet, i, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_all(self, worksheet, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, chunk in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=chunk)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                has_state = None
                has_within = None
                inverses = ld_find_as_object(resource, instance_id)
                for inv in inverses:
                    subj, pred = inv
                    if pred == "http://linked.data.gov.au/def/asgs#isStateOrTerritoryOf":
                        has_state = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#MeshBlock" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel1Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel2Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel3Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel3" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel4Of":
                        has_within = subj
                self.write_worksheet_item(worksheet, (j*16)+i, identifier, class_name, has_state, has_within)

    def export_csv_all(self, csv_file, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, sixteen in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=sixteen)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                has_state = None
                has_within = None
                inverses = ld_find_as_object(resource, instance_id)
                for inv in inverses:
                    subj, pred = inv
                    if pred == "http://linked.data.gov.au/def/asgs#isStateOrTerritoryOf":
                        has_state = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#MeshBlock" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel1Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel1" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel2Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel2" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel3Of":
                        has_within = subj
                    elif class_name == "http://linked.data.gov.au/def/asgs#StatisticalAreaLevel3" and pred == "http://linked.data.gov.au/def/asgs#isStatisticalAreaLevel4Of":
                        has_within = subj
                self.write_csv_item(csv_file, identifier, class_name, has_state, has_within)

    def _export_as_xlsx(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        register = self.register
        pickle_filename = "{}_index_{}.pickle".format(suffix, limit)
        try:
            with open(pickle_filename, 'rb') as f:
                index = pickle.load(f)
        except FileNotFoundError:
            index = register.index(min_count=limit)
            with open(pickle_filename, 'wb') as f:
                pickle.dump(index, f)
        dont_close_workbook = False
        if 'workbook' in kwargs:
            workbook = kwargs.pop('workbook')
            dont_close_workbook = True
        else:
            workbook = xlsxwriter.Workbook('asgs_{}.xlsx'.format(suffix))
        worksheet = workbook.add_worksheet()
        if 'worksheet_name' in kwargs:
            name = kwargs.pop('worksheet_name')
            worksheet.name = name
        self.write_worksheet_header(worksheet)
        if self.just_index:
            self.export_xlsx_index(worksheet, index, limit=limit)
        else:
            self.export_xlsx_all(worksheet, index, limit=limit)
        if not dont_close_workbook:
            workbook.close()

    def _export_as_csv(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        #index = register.index()
        register = self.register
        try:
            with open("{}_index_{}.pickle".format(suffix, limit), 'rb') as f:
                index = pickle.load(f)
        except FileNotFoundError:
            index = register.index(min_count=limit)
            with open("{}_index_{}.pickle".format(suffix, limit), 'wb') as f:
                pickle.dump(index, f)
        file = open("asgs_{}.csv".format(suffix), 'w', encoding='utf-8')
        try:
            self.write_csv_header(file)
            if self.just_index:
                self.export_csv_index(file, index, limit=limit)
            else:
                self.export_csv_all(file, index, limit=limit)
        finally:
            file.close()

    def export(self, *args, mode="excel", **kwargs):
        if mode == "excel" or mode == "xlsx":
            return self._export_as_xlsx(*args, **kwargs)
        elif mode == "csv":
            return self._export_as_csv(*args, **kwargs)
        else:
            raise NotImplementedError(
                "No exporter mode \"{}\"".format(mode))

    def __init__(self, just_index=False, register_end="statisticalarealevel1"):
        self.url_count = 0
        self.just_index = just_index
        self.register_end = register_end
        self.client = LDAPIClient(
            "http://linked.data.gov.au/dataset/asgs2016",
            url_remapper=LDAPI_CLIENT_REMAPPER
        )
        self.register = self.client.register("http://linked.data.gov.au/dataset/asgs2016/{}/".format(self.register_end))
Ejemplo n.º 7
0
class GNAFFeatureExporter(object):

    def write_worksheet_header(self, worksheet):
        """
        :param worksheet:
        :type worksheet: xlsxwriter.writer.Worksheet
        :return:
        """
        headers = list(HEADERS)
        if self.register_end == "address":
            headers.append("type")
            headers.append("text")
            headers.append("state")
            headers.append("locality")
            headers.append("lat")
            headers.append("lng")
        elif self.register_end == "locality":
            headers.append("state")
        i = 0
        for i, h in enumerate(headers):
            worksheet._write_string(0, i, h)
            worksheet.set_column(i, i, 30)

    def write_csv_header(self, csv_file):
        """
        :param csv_file:
        :type csv_file: IO
        :return:
        """
        headers = list(HEADERS)
        if self.register_end == "address":
            headers.append("type")
            headers.append("text")
            headers.append("state")
            headers.append("locality")
            headers.append("lat")
            headers.append("lng")
        elif self.register_end == "locality":
            headers.append("state")
        for h in headers[0:-1]:
            csv_file.write("{},".format(h))
        csv_file.write("{}\n".format(headers[-1]))

    def write_worksheet_url(self, worksheet, row, col, url, text):
        if self.url_count > 65500:
            worksheet._write_string(row, col, str(url))
        else:
            worksheet.write_url(row, col, str(url), string=str(text))
            self.url_count += 1

    def write_worksheet_item(self, worksheet, i, identifier, class_uri, has_type=None, has_text=None, has_state=None, has_locality=None, has_coord=None):
        row = i+1
        identifier_text = identifier.rsplit('/', 1)[-1]
        self.write_worksheet_url(worksheet, row, 0, identifier, identifier_text)
        class_text = class_uri.rsplit('/', 1)[-1]
        self.write_worksheet_url(worksheet, row, 1, class_uri, class_text)
        is_address = self.register_end == "address"
        is_locality = self.register_end == "locality"
        col = 2
        if is_address:
            if has_type:
                has_type_text = has_type.rsplit('/', 1)[-1]
                self.write_worksheet_url(worksheet, row, col, has_type, has_type_text)
            col += 1
            if has_text:
                worksheet._write_string(row, col, str(has_text))
            col += 1
        if is_locality or is_address:
            if has_state:
                has_state_text = has_state.rsplit('/', 1)[-1]
                self.write_worksheet_url(worksheet, row, col, has_state, has_state_text)
            col += 1
        if is_address:
            if has_locality:
                has_loc_text = has_locality.rsplit('/', 1)[-1]
                self.write_worksheet_url(worksheet, row, col, has_locality, has_loc_text)
            col += 1
            if has_coord:
                worksheet._write_string(row, col, str(has_coord[0]))
                worksheet._write_string(row, col+1, str(has_coord[1]))
            col += 2

    def write_csv_item(self, csv_file, identifier, class_uri, has_type=None, has_text=None, has_state=None, has_locality=None, has_coord=None):
        csv_file.write("{},".format(str(identifier)))
        csv_file.write("{}".format(str(class_uri)))
        is_address = self.register_end == "address"
        is_locality = self.register_end == "locality"
        if is_address:
            if has_type:
                csv_file.write(",{}".format(str(has_type)))
            else:
                csv_file.write(",")
            if has_text:
                cleaned_text = str(has_text).replace("\"", """)
                csv_file.write(",\"{}\"".format(cleaned_text))
            else:
                csv_file.write(",")
        if is_locality or is_address:
            if has_state:
                csv_file.write(",{}".format(str(has_state)))
            else:
                csv_file.write(",")
        if is_address:
            if has_locality:
                csv_file.write(",{}".format(str(has_locality)))
            else:
                csv_file.write(",")
            if has_coord:
                csv_file.write(",{},{}\n".format(str(has_coord[0]), str(has_coord[1])))
            else:
                csv_file.write(",,\n")
        else:
            csv_file.write("\n")

    def export_csv_index(self, csv_file, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_csv_item(csv_file, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_index(self, worksheet, index, limit=None):
        for i, (k, v) in enumerate(index.items()):
            try:
                identifier = str(v['@id'])
                class_name = str(v['@type'][0])
            except (KeyError, AttributeError) as e:
                print("Issue with index id: {}".format(str(k)))
                raise e
            self.write_worksheet_item(worksheet, i, identifier, class_name)
            if limit is not None and i >= limit:
                break

    def export_xlsx_all(self, worksheet, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, sixteen in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=sixteen)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                try:
                    has_type = d[gnaf_gnafType]
                    has_type = [w['@id'] for w in has_type]
                    has_type = sorted(has_type)[0]
                except (KeyError, IndexError, AttributeError):
                    has_type = None
                except Exception as e:
                    raise e
                try:
                    has_text = d["http://www.w3.org/2000/01/rdf-schema#comment"]
                    has_text = [w['@value'] for w in has_text]
                    has_text = sorted(has_text)[0]
                except (KeyError, IndexError, AttributeError):
                    has_text = None
                try:
                    has_state = d[gnaf_hasState]
                    has_state = [w['@id'] for w in has_state]
                    has_state = sorted(has_state)[0]
                except (KeyError, IndexError, AttributeError):
                    has_state = None
                except Exception as e:
                    raise e
                try:
                    has_locality = d[gnaf_hasLocality]
                    has_locality = [w['@id'] for w in has_locality]
                    has_locality = sorted(has_locality)[0]
                except (KeyError, IndexError, AttributeError):
                    has_locality = None
                try:
                    has_geometry = d[geos_hasGeometry]
                    has_geometry = [w['@id'] for w in has_geometry]
                    has_geometry = sorted(has_geometry)[0]
                    geo_obj = ld_find_subject(resource, has_geometry)
                    assert geo_obj and isinstance(geo_obj, dict)
                    as_wkt = geo_obj[geos_asWKT]
                    as_wkt = [w['@value'] for w in as_wkt]
                    as_wkt = sorted(as_wkt)[0]
                    found = latlng_wkt_regex.search(as_wkt)
                    assert found
                    lng = found[1]
                    lat = found[2]
                    has_coord = (lat, lng)
                except (KeyError, IndexError, AttributeError, AssertionError):
                    has_coord = None
                except Exception as e:
                    raise e
                self.write_worksheet_item(worksheet, (j*16)+i, identifier, class_name, has_type, has_text, has_state, has_locality, has_coord)

    def export_csv_all(self, csv_file, index, limit=None):
        if isinstance(index, dict):
            index = tuple(index.keys())
        if limit is not None:
            limit = int(limit)
            assert limit > 0
            index = index[0:limit]
        for j, sixteen in enumerate(chunks(index, 16)):
            instances = self.register.instances(index=sixteen)
            for i, (instance_id, resource) in enumerate(instances.items()):
                d = ld_find_subject(resource, instance_id)
                if d is None:
                    continue
                identifier = str(d['@id'])
                class_name = str(sorted(d['@type'])[0])
                try:
                    has_type = d[gnaf_gnafType]
                    has_type = [w['@id'] for w in has_type]
                    has_type = sorted(has_type)[0]
                except (KeyError, IndexError, AttributeError):
                    has_type = None
                except Exception as e:
                    raise e
                try:
                    has_text = d["http://www.w3.org/2000/01/rdf-schema#comment"]
                    has_text = [w['@value'] for w in has_text]
                    has_text = sorted(has_text)[0]
                except (KeyError, IndexError, AttributeError):
                    has_text = None
                try:
                    has_state = d[gnaf_hasState]
                    has_state = [w['@id'] for w in has_state]
                    has_state = sorted(has_state)[0]
                except (KeyError, IndexError, AttributeError):
                    has_state = None
                except Exception as e:
                    raise e
                try:
                    has_locality = d[gnaf_hasLocality]
                    has_locality = [w['@id'] for w in has_locality]
                    has_locality = sorted(has_locality)[0]
                except (KeyError, IndexError, AttributeError):
                    has_locality = None
                except Exception as e:
                    raise e
                try:
                    has_geometry = d[geos_hasGeometry]
                    has_geometry = [w['@id'] for w in has_geometry]
                    has_geometry = sorted(has_geometry)[0]
                    geo_obj = ld_find_subject(resource, has_geometry)
                    assert geo_obj and isinstance(geo_obj, dict)
                    as_wkt = geo_obj[geos_asWKT]
                    as_wkt = [w['@value'] for w in as_wkt]
                    as_wkt = sorted(as_wkt)[0]
                    found = latlng_wkt_regex.search(as_wkt)
                    assert found
                    lng = found[1]
                    lat = found[2]
                    has_coord = (lat, lng)
                except (KeyError, IndexError, AttributeError, AssertionError):
                    has_coord = None
                except Exception as e:
                    raise e
                self.write_csv_item(csv_file, identifier, class_name, has_type, has_text, has_state, has_locality, has_coord)

    def _export_as_xlsx(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        #reset to first page, and increase per_page
        _ = self.register.index_page(page=1, per_page=100)
        pickle_filename = "{}_index_{}.pickle".format(suffix, str(limit))
        try:
            with open(pickle_filename, 'rb') as f:
                index = pickle.load(f)
        except:
            index = self.register.index(min_count=limit)
            with open(pickle_filename, 'wb') as f:
                pickle.dump(index, f)
        dont_close_workbook = False
        if 'workbook' in kwargs:
            workbook = kwargs.pop('workbook')
            dont_close_workbook = True
        else:
            workbook = xlsxwriter.Workbook('gnaf_{}.xlsx'.format(suffix))
        worksheet = workbook.add_worksheet()
        if 'worksheet_name' in kwargs:
            name = kwargs.pop('worksheet_name')
            worksheet.name = name
        self.write_worksheet_header(worksheet)
        if self.just_index:
            self.export_xlsx_index(worksheet, index, limit=limit)
        else:
            self.export_xlsx_all(worksheet, index, limit=limit)
        if not dont_close_workbook:
            workbook.close()

    def _export_as_csv(self, *args, limit=None, **kwargs):
        suffix = self.register_end
        #reset to first page, and increase per_page
        _ = self.register.index_page(page=1, per_page=100)
        pickle_filename = "{}_index_{}.pickle".format(suffix, str(limit))
        try:
            with open(pickle_filename, 'rb') as f:
                index = pickle.load(f)
        except:
            index = self.register.index(min_count=limit)
            with open(pickle_filename, 'wb') as f:
                pickle.dump(index, f)
        file = open("gnaf_{}.csv".format(suffix), 'w', encoding='utf-8')
        try:
            self.write_csv_header(file)
            if self.just_index:
                self.export_csv_index(file, index, limit=limit)
            else:
                self.export_csv_all(file, index, limit=limit)
        finally:
            file.close()

    def export(self, *args, mode="excel", **kwargs):
        if mode == "excel" or mode == "xlsx":
            return self._export_as_xlsx(*args, **kwargs)
        elif mode == "csv":
            return self._export_as_csv(*args, **kwargs)
        else:
            raise NotImplementedError(
                "No exporter mode \"{}\"".format(mode))

    def __init__(self, just_index=False, register_end="locality"):
        self.url_count = 0
        self.just_index = just_index
        self.register_end = register_end
        self.client = LDAPIClient(
            "http://linked.data.gov.au/dataset/gnaf",
            url_remapper=LDAPI_CLIENT_REMAPPER
        )
        self.register = self.client.register("http://linked.data.gov.au/dataset/gnaf/{}/".format(self.register_end))