def test_find_shapefiles(data_dir):
    found = find_shapefiles(str(data_dir.join('geodata/roads-folders.zip')))
    assert sorted(found.keys()) == [
        'roads-{0:02d}/roads-{0:02d}'.format(i) for i in xrange(4)]

    for name, item in found.iteritems():
        assert sorted(item.keys()) == ['dbf', 'prj', 'shp', 'shx']

        assert item['shp'].open().read(4) == b'\x00\x00\x27\x0a'
        assert item['shx'].open().read(4) == b'\x00\x00\x27\x0a'
        assert item['dbf'].open().read(4) == b'\x03\x5f\x07\x1a'
        assert item['prj'].open().read(4) == b'PROJ'
def test_find_shapefiles(data_dir):
    found = find_shapefiles(str(data_dir.join('geodata/roads-folders.zip')))
    assert sorted(found.keys()) == [
        'roads-{0:02d}/roads-{0:02d}'.format(i) for i in xrange(4)
    ]

    for name, item in found.iteritems():
        assert sorted(item.keys()) == ['dbf', 'prj', 'shp', 'shx']

        assert item['shp'].open().read(4) == b'\x00\x00\x27\x0a'
        assert item['shx'].open().read(4) == b'\x00\x00\x27\x0a'
        assert item['dbf'].open().read(4) == b'\x03\x5f\x07\x1a'
        assert item['prj'].open().read(4) == b'PROJ'
예제 #3
0
def import_dataset_find_shapefiles(dataset_id, dataset_conf):
    """
    Find all the Shapefiles from archives listed as dataset resources.

    :param dataset_id: The dataset id
    :param dataset_conf: The dataset configuration
    """

    destination_table = 'geodata_{0}'.format(dataset_id)

    create_table_sqls = []
    import_data_sqls = []

    with TemporaryDir() as tempdir:
        # First, copy zip files to temporary directory

        for resource in dataset_conf['resources']:
            # We assume the file is a zip, but we should double-check that!
            dest_file = os.path.join(tempdir, _random_file_name('zip'))

            if isinstance(resource, basestring):
                resource = {'url': resource}

            # Copy the resource to disk
            _copy_resource_to_file(resource, dest_file)

            # Let's look for shapefiles inside that thing..
            found = find_shapefiles(dest_file)
            for basename, files in found.iteritems():
                if 'shp' not in files:
                    continue  # Bad match..

                # Export shapefiles to temporary files
                base_name = _random_file_name()
                for ext, item in files.iteritems():
                    dest = os.path.join(tempdir, base_name + '.' + ext)

                    with open(dest, 'wb') as fp:
                        # todo: copy file in chunks, not as a whole
                        fp.write(item.open().read())

                shp_full_path = os.path.join(tempdir, base_name + '.shp')

                create_table_sql = shp2pgsql(
                    shp_full_path,
                    table=destination_table,
                    create_table_only=True, mode='create',
                    geometry_column='geom', create_gist_index=True)

                # Use TEXT fields instead of varchar(XX)
                # todo: use a less-hackish way!!
                create_table_sql = re.sub(
                    r'varchar\([0-9]+\)', 'text', create_table_sql,
                    flags=re.IGNORECASE)

                import_data_sql = shp2pgsql(
                    shp_full_path,
                    table=destination_table,
                    mode='append',
                    geometry_column='geom',
                    create_gist_index=False)

                create_table_sqls.append(create_table_sql)
                import_data_sqls.append(import_data_sql)

    with admin_db, admin_db.cursor() as cur:
        cur.execute(create_table_sqls[0])

    with db, db.cursor() as cur:
        for sql in import_data_sqls:
            cur.execute(sql)