def test_2d_srid4326(self): # SRID just from meta: pt = dict(type='Point', coordinates=[0.0, 1.0], meta=dict(srid=4326)) expected = 'SRID=4326;' + WKT['point']['2d'] self.assertEqual(expected, wkt.dumps(pt)) # SRID from both meta and crs: pt = dict( type='Point', coordinates=[0.0, 1.0], meta=dict(srid=4326), crs={'type': 'name', 'properties': {'name': 'EPSG4326'}}, ) expected = 'SRID=4326;' + WKT['point']['2d'] self.assertEqual(expected, wkt.dumps(pt)) # SRID just from crs: pt = dict( type='Point', coordinates=[0.0, 1.0], crs={'type': 'name', 'properties': {'name': 'EPSG4326'}}, ) expected = 'SRID=4326;' + WKT['point']['2d'] self.assertEqual(expected, wkt.dumps(pt)) # Conflicting SRID from meta and crs: pt = dict( type='Point', coordinates=[0.0, 1.0], meta=dict(srid=4326), crs={'type': 'name', 'properties': {'name': 'EPSG4327'}}, ) expected = 'SRID=4326;' + WKT['point']['2d'] with self.assertRaises(ValueError) as ar: wkt.dumps(pt) self.assertEqual('Ambiguous CRS/SRID values: 4326 and 4327', str(ar.exception))
def test_malformed_geojson(self): bad_geojson = [ # GEOMETRYCOLLECTIONs have 'geometries', not coordinates dict(type='GeometryCollection', coordinates=[]), # All other geometry types must have coordinates dict(type='Point'), # and a type dict(coordinates=[]), ] for each in bad_geojson: with self.assertRaises(geomet.InvalidGeoJSONException): wkt.dumps(each)
def test_3d(self): # Test for an XYZ/XYM Point: pt = dict(type='Point', coordinates=[0.0, -1.0, 2.0]) expected = ( 'POINT (0.0000000000000000 -1.0000000000000000 2.0000000000000000)' ) self.assertEqual(expected, wkt.dumps(pt))
def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, {'key': 'spatial_text', 'value': 'Tarragona'}, {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial eq_(unicode(spatial), extras['spatial_uri']) assert self._triple(g, spatial, RDF.type, DCT.Location) assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2) # Geometry in GeoJSON assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) # Geometry in WKT wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
def test_4d(self): # Test for an XYZM Point: pt = dict(type='Point', coordinates=[-0.0, -1.0, -2.0, -4.0]) expected = ( 'POINT (-0.0000000000000000 -1.0000000000000000 ' '-2.0000000000000000 -4.0000000000000000)' ) self.assertEqual(expected, wkt.dumps(pt))
def test_2d_srid4326(self): mp = dict( type='MultiPoint', coordinates=[[100.0, 3.101], [101.0, 2.1], [3.14, 2.18]], meta=dict(srid=4326), ) expected = 'SRID=4326;' + WKT['multipoint']['2d'] self.assertEqual(expected, wkt.dumps(mp, decimals=3))
def test_2d(self): mp = dict(type='MultiPoint', coordinates=[ [100.0, 3.101], [101.0, 2.1], [3.14, 2.18], ]) expected = ( 'MULTIPOINT ((100.000 3.101), (101.000 2.100), (3.140 2.180))' ) self.assertEqual(expected, wkt.dumps(mp, decimals=3))
def test_srid26618(self): gc = { 'geometries': [ {'coordinates': [0.0, 1.0], 'type': 'Point'}, {'coordinates': [[-100.0, 0.0], [-101.0, -1.0]], 'type': 'LineString'}, {'coordinates': [[[100.001, 0.001], [101.1235, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]]], 'type': 'Polygon'}, {'coordinates': [[100.0, 3.101], [101.0, 2.1], [3.14, 2.18]], 'type': 'MultiPoint'}, {'coordinates': [[[0.0, -1.0], [-2.0, -3.0], [-4.0, -5.0]], [[1.66, -31023.5, 1.1], [10000.9999, 3.0, 2.2], [100.9, 1.1, 3.3], [0.0, 0.0, 4.4]]], 'type': 'MultiLineString'}, {'coordinates': [[[[100.001, 0.001], [101.001, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]]], [[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [1.0, 2.0, 3.0, 4.0]]]], 'type': 'MultiPolygon'}, ], 'type': 'GeometryCollection', 'meta': dict(srid=26618), } expected = ( 'SRID=26618;GEOMETRYCOLLECTION ' '(POINT (0.000 1.000),' 'LINESTRING (-100.000 0.000, -101.000 -1.000),' 'POLYGON ((100.001 0.001, 101.124 0.001, 101.001 1.001, ' '100.001 0.001), (100.201 0.201, 100.801 0.201, 100.801 0.801, ' '100.201 0.201)),' 'MULTIPOINT ((100.000 3.101), (101.000 2.100), (3.140 2.180)),' 'MULTILINESTRING ((0.000 -1.000, -2.000 -3.000, -4.000 -5.000), ' '(1.660 -31023.500 1.100, 10001.000 3.000 2.200, ' '100.900 1.100 3.300, 0.000 0.000 4.400)),' 'MULTIPOLYGON (((100.001 0.001, 101.001 0.001, 101.001 1.001, ' '100.001 0.001), ' '(100.201 0.201, 100.801 0.201, 100.801 0.801, 100.201 0.201)), ' '((1.000 2.000 3.000 4.000, 5.000 6.000 7.000 8.000, ' '9.000 10.000 11.000 12.000, 1.000 2.000 3.000 4.000))))' ) self.assertEqual(expected, wkt.dumps(gc, decimals=3))
def test_4d(self): mp = dict(type='MultiPoint', coordinates=[ [100.0, 3.1, 1, 0], [101.0, 2.1, 2, 0], [3.14, 2.18, 3, 0], ]) expected = ( 'MULTIPOINT ((100.00 3.10 1.00 0.00), (101.00 2.10 2.00 0.00), ' '(3.14 2.18 3.00 0.00))' ) self.assertEqual(expected, wkt.dumps(mp, decimals=2))
def test_2d_srid4326(self): # Test a typical 2D LineString case: ls = dict( type='LineString', coordinates=[[-100.0, 0.0], [-101.0, -1.0]], meta=dict(srid=4326), ) expected = 'SRID=4326;' + WKT['linestring']['2d'] self.assertEqual(expected, wkt.dumps(ls))
def test_3d(self): poly = dict(type='Polygon', coordinates=[ [[100.0, 0.0, 3.1], [101.0, 0.0, 2.1], [101.0, 1.0, 1.1], [100.0, 0.0, 3.1]], [[100.2, 0.2, 3.1], [100.8, 0.2, 2.1], [100.8, 0.8, 1.1], [100.2, 0.2, 3.1]], ]) expected = WKT['polygon']['3d'] self.assertEqual(expected, wkt.dumps(poly, decimals=1))
def test_2d(self): poly = dict(type='Polygon', coordinates=[ [[100.001, 0.001], [101.12345, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]], ]) expected = WKT['polygon']['2d'] self.assertEqual(expected, wkt.dumps(poly, decimals=4))
def test_3d(self): ls = dict(type='LineString', coordinates=[[100.0, 0.0, -60.0], [101.0, 1.0, -65.25]]) expected = ( 'LINESTRING (' '100.0000000000000000 0.0000000000000000 -60.0000000000000000, ' '101.0000000000000000 1.0000000000000000 -65.2500000000000000)' ) self.assertEqual(expected, wkt.dumps(ls))
def test_2d(self): # Test a typical 2D LineString case: ls = dict(type='LineString', coordinates=[[-100.0, 0.0], [-101.0, -1.0]]) expected = ( 'LINESTRING (-100.0000000000000000 0.0000000000000000, ' '-101.0000000000000000 -1.0000000000000000)' ) self.assertEqual(expected, wkt.dumps(ls))
def __init__(self, feature): # feature is a geoJSON feature self.coordinates = wkt.dumps(feature["geometry"]) properties = feature["properties"] self.objectid = properties["objectid"] self.permit_no = properties["permit_no"] self.mobility_impact_text = properties["mobility_impact_text"] self.permit_address_text = properties["permit_address_text"] self.applicant_name = properties["applicant_name"]
def test_2d(self): mlls = dict(type='MultiLineString', coordinates=[ [[0.0, -1.0], [-2.0, -3.0], [-4.0, -5.0]], [[1.66, -31023.5], [10000.9999, 3.0], [100.9, 1.1], [0.0, 0.0]], ]) expected = ( 'MULTILINESTRING ((0.000 -1.000, -2.000 -3.000, -4.000 -5.000), ' '(1.660 -31023.500, 10001.000 3.000, 100.900 1.100, 0.000 0.000))' ) self.assertEqual(expected, wkt.dumps(mlls, decimals=3))
def test_2d_srid2666(self): poly = dict( type='Polygon', coordinates=[ [[100.001, 0.001], [101.12345, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]], ], meta=dict(srid=2666), ) expected = 'SRID=2666;' + WKT['polygon']['2d'] self.assertEqual(expected, wkt.dumps(poly, decimals=4))
def test_3d(self): poly = dict(type='Polygon', coordinates=[ [[100.0, 0.0, 3.1], [101.0, 0.0, 2.1], [101.0, 1.0, 1.1], [100.0, 0.0, 3.1]], [[100.2, 0.2, 3.1], [100.8, 0.2, 2.1], [100.8, 0.8, 1.1], [100.2, 0.2, 3.1]], ]) expected = ( 'POLYGON ((100.0 0.0 3.1, 101.0 0.0 2.1, 101.0 1.0 1.1, ' '100.0 0.0 3.1), ' '(100.2 0.2 3.1, 100.8 0.2 2.1, 100.8 0.8 1.1, 100.2 0.2 3.1))' ) self.assertEqual(expected, wkt.dumps(poly, decimals=1))
def test_dumps_empty_geoms(self): types = [ 'Point', 'LineString', 'Polygon', 'MultiPoint', 'MultiLineString', 'MultiPolygon', ] expected = ['%s EMPTY' % x.upper() for x in types] for i, t in enumerate(types): geom = dict(type=t, coordinates=[]) self.assertEqual(expected[i], wkt.dumps(geom))
def test_2d(self): poly = dict(type='Polygon', coordinates=[ [[100.001, 0.001], [101.12345, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]], ]) expected = ( 'POLYGON ((100.0010 0.0010, 101.1235 0.0010, 101.0010 1.0010, ' '100.0010 0.0010), ' '(100.2010 0.2010, 100.8010 0.2010, 100.8010 0.8010, ' '100.2010 0.2010))' ) self.assertEqual(expected, wkt.dumps(poly, decimals=4))
def test_4d(self): mlls = dict(type='MultiLineString', coordinates=[ [[0.0, -1.0, 1.0, 0.0], [-2.0, -3.0, 1.0, 0.0], [-4.0, -5.0, 1.0, 0.0]], [[1.66, -31023.5, 1.1, 0.0], [10000.9999, 3.0, 2.2, 0.0], [100.9, 1.1, 3.3, 0.0], [0.0, 0.0, 4.4, 0.0]], ]) expected = ( 'MULTILINESTRING ((0.00 -1.00 1.00 0.00, ' '-2.00 -3.00 1.00 0.00, -4.00 -5.00 1.00 0.00), ' '(1.66 -31023.50 1.10 0.00, 10001.00 3.00 2.20 0.00, ' '100.90 1.10 3.30 0.00, 0.00 0.00 4.40 0.00))' ) self.assertEqual(expected, wkt.dumps(mlls, decimals=2))
def test(self): mpoly = dict(type='MultiPolygon', coordinates=[ [[[100.001, 0.001], [101.001, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]]], [[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [1.0, 2.0, 3.0, 4.0]]], ]) expected = ( 'MULTIPOLYGON (((100.001 0.001, 101.001 0.001, 101.001 1.001, ' '100.001 0.001), ' '(100.201 0.201, 100.801 0.201, 100.801 0.801, ' '100.201 0.201)), ' '((1.000 2.000 3.000 4.000, 5.000 6.000 7.000 8.000, ' '9.000 10.000 11.000 12.000, 1.000 2.000 3.000 4.000)))' ) self.assertEqual(expected, wkt.dumps(mpoly, decimals=3))
def translate(text, output_format='json', indent=None, precision=-1): if text.startswith('{'): geom = json.loads(text) elif text.startswith(('G', 'L', 'M', 'P')): geom = wkt.loads(text) else: geom = wkb.loads(a2b_hex(text)) if output_format == 'wkb': output = b2a_hex(wkb.dumps(geom)) elif output_format == 'wkt': kwds = {} if precision >= 0: kwds['decimals'] = precision output = wkt.dumps(geom, **kwds) else: if precision >= 0: geom = util.round_geom(geom, precision) output = json.dumps(geom, indent=indent, sort_keys=True) return output
def bbox_filter(table, bbox): """Table is actual table object, bbox is of the form [lon1, lat1, lon2, lat2] """ # Generate the polygon for the bounding box coords = [[bbox[0], bbox[1]], [bbox[0], bbox[3]], [bbox[2], bbox[3]], [bbox[2], bbox[1]], [bbox[0], bbox[1]]] wkt_polygon = wkt.dumps({'type': 'Polygon', 'coordinates': [coords]}, decimals=6) print wkt_polygon # Query the database and return the result query = db.session.query(table) filtered = query.filter(table.coordinates.intersects(wkt_polygon)) resolved = [x for x in filtered] print len(resolved) return resolved
def geojson_to_wkt(self, gjson_str): ## Ths GeoJSON string should look something like: ## u'{"type": "Polygon", "coordinates": [[[-54, 46], [-54, 47], [-52, 47], [-52, 46], [-54, 46]]]}'] ## Convert this JSON into an object, and load it into a Shapely object. The Shapely library can ## then output the geometry in Well-Known-Text format try: gjson = json.loads(gjson_str) try: gjson = _add_extra_longitude_points(gjson) except: # this is bad, but all we're trying to do is improve # certain shapes and if that fails showing the original # is good enough pass shape = gjson except ValueError: return None # avoid 500 error on bad geojson in DB wkt_str = wkt.dumps(shape) return wkt_str
def test_nested_gc(self): gc = { "type": "GeometryCollection", "geometries": [ { "type": "GeometryCollection", "geometries": [ { "type": "Point", "coordinates": [ 1.0, 2.0 ] }, { "type": "Point", "coordinates": [ 3.0, 4.0 ] }, ], }, { "type": "Point", "coordinates": [ 5.0, 6.0 ], }, ], } expected = ( "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (1 2),POINT (3 4))," "POINT (5 6))" ) self.assertEqual(expected, wkt.dumps(gc, decimals=0))
def test_unsupported_geom_type(self): geom = dict(type='Tetrahedron', coordinates=[]) with self.assertRaises(ValueError) as ar: wkt.dumps(geom) self.assertEqual("Unsupported geometry type 'Tetrahedron'", str(ar.exception))
def test_2d(self): # Test a typical 2D LineString case: ls = dict(type='LineString', coordinates=[[-100.0, 0.0], [-101.0, -1.0]]) expected = WKT['linestring']['2d'] self.assertEqual(expected, wkt.dumps(ls))
def test_2d_3_decimals(self): ls = dict(type='LineString', coordinates=[[100.0, 0.0], [101.0, 1.0]]) expected = 'LINESTRING (100.000 0.000, 101.000 1.000)' self.assertEqual(expected, wkt.dumps(ls, decimals=3))
def test_2d(self): # Tests a typical 2D Point case: pt = dict(type='Point', coordinates=[0.0, 1.0]) expected = WKT['point']['2d'] self.assertEqual(expected, wkt.dumps(pt))
def test_4d(self): # Test for an XYZM Point: pt = dict(type='Point', coordinates=[-0.0, -1.0, -2.0, -4.0]) expected = WKT['point']['4d'] self.assertEqual(expected, wkt.dumps(pt))
def geojson_to_wkt(value): return wkt.dumps(json.loads(value))
def waze_jams_to_csv(): """Waze jams feed to csv.""" url = 'https://na-georss.waze.com/rtserver/web/TGeoRSS?'\ + 'tk=ccp_partner&ccp_partner_name=SanDiego&format=JSON'\ + '&types=traffic&polygon=-117.338791,32.515842;-116.763725,'\ + '32.565044;-116.967316,32.741082;-116.924744,33.086939;'\ + '-117.000618,33.165720;-117.141037,33.065075;-117.351837,'\ + '32.969500;-117.338791,32.515842;-117.338791,32.515842' datetime_utc = datetime.now(timezone('UTC')) datetime_pst = datetime_utc.astimezone(timezone('US/Pacific')) date_pst = datetime_pst.strftime('%Y-%m-%d') timestamp_pst = datetime_pst.strftime('%Y-%m-%d %H:%M:%S') logfile = conf['prod_data_dir'] + '/{0}_waze_jams_datasd.csv'.format(date_pst) tempfile = conf['temp_data_dir'] + '/waze_temp.csv' rows_csv = [] rows_db = [] cols = ['uuid', 'waze_timestamp', 'street', 'start_node', 'end_node', 'city', 'length', 'delay', 'speed', 'level', 'road_type', 'geom'] fields_dict = { 'uuid': 'uuid', 'length': 'length', 'delay': 'delay', 'speed': 'speed', 'level': 'level', 'road_type': 'roadType', 'city': 'city', 'street': 'street', 'start_node': 'startNode', 'end_node': 'endNode'} r = requests.get(url) body = json.loads(r.content) jams = body['jams'] for jam in jams: for key, value in fields_dict.iteritems(): if value not in jam: exec("{0}='NaN'".format(key)) else: exec("{0}=jam['{1}']".format(key, value)) line = [] coordinates = jam['line'] for i in coordinates: lon = i['x'] lat = i['y'] xy = (lon, lat) line.append(xy) line = LineString(line) line_wkt = wkt.dumps(line, decimals=6) line_db = 'SRID=4326;' + line_wkt row_csv = [uuid, timestamp_pst, street, start_node, end_node, city, length, delay, speed, level, road_type, line_wkt] row_db = [uuid, timestamp_pst, street, start_node, end_node, city, length, delay, speed, level, road_type, line_db] for el in row_db: if isinstance(el, basestring): el.encode('utf-8') rows_csv.append(row_csv) rows_db.append(row_db) logging.info('Saving Waze data to temp csv file.') temp_df = pd.DataFrame(data=rows_db, columns=cols) general.pos_write_csv(temp_df, tempfile, header=False) logging.info('Saving Waze data to daily csv log.') if not os.path.exists(logfile): df = pd.DataFrame(data=rows_csv, columns=cols) general.pos_write_csv(df, logfile) else: log_df = pd.read_csv(logfile) df = pd.DataFrame(data=rows_csv, columns=cols) log_df = log_df.append(df, ignore_index=True) general.pos_write_csv(log_df, logfile) return 'Successfully saved data to csv.'
def seed_db(db): """ Add seed entities to the database. """ with app.app_context(): for x in User.create_defaults(): db.session.add(x) print 'Created user table...' for x in Role.create_defaults(): db.session.add(x) print 'Created role table...' for x in DataSet.create_defaults(): db.session.add(x) print 'Created dataset table...' # for x in Indicator.create_defaults(): # db.session.add(x) for x in dfm.values.tolist(): ind = Indicator() ind.in_name = x[0] ind.unit = x[1] ind.definition = x[2] ind.theme = x[3] ind.sub_theme = x[4] ind.source = x[5] ind.frequency = x[6] db.session.add(ind) print 'Created indicator table...' for x in Region.create_defaults(): db.session.add(x) print 'Created region table...' for x in WaziRegion.create_defaults(): db.session.add(x) print 'Created Wazi-region table...' for x in Type.create_defaults(): db.session.add(x) print 'Created type table...' for x in Theme.create_defaults(): db.session.add(x) print 'Created theme table...' db.session.flush() db.session.commit() print 'Populating datapoints...' mapping = {} mapping_theme = {} for i in range(0, len(dfi)): index = dfi.iloc[i][0] indicator = dfi.iloc[i][2] theme = dfi.iloc[i][3] mapping[index] = indicator mapping_theme[index] = theme for i in range(0, len(df)): dataset_id = df.iloc[i][0] indicator_id = mapping[df.iloc[i][0]] region_id = regions[df.iloc[i][1]] type_id = types[df.iloc[i][2]] theme_id = mapping_theme[df.iloc[i][0]] for y, c in zip(range(1996, 2018), range(3, 25)): if isnan(df.iloc[i][c]): pass else: point = DataPoint() value = df.iloc[i][c] year = y point.dataset_id = int(dataset_id) point.indicator_id = int(indicator_id) point.region_id = int(region_id) point.type_id = int(type_id) point.theme_id = int(theme_id) point.value = float(value) point.year = int(year) db.session.add(point) db.session.flush() db.session.commit() data = geojson.load( open( 'C:/Users/mrade_000/Documents/GitHub/scoda/scoda/data/metro_salc_geo.json' )) data2 = genfromtxt( 'C:/Users/mrade_000/Documents/GitHub/scoda/scoda/data/jhbpopests_clean.csv', delimiter=',') parser = {} for i, I in enumerate(sort(list(set(data2[:, 2])))): parser[int(I)] = {} parser[int(I)]['includes'] = [] parser[int(I)]['city_ref'] = i for i in range(len(data2)): parser[int(data2[i, 2])]['includes'].append(data2[i, 1]) parser2 = {} for i in data2: parser2[int(i[1])] = list(i[21:]) print 'Populating city enumerator GIS data...' for poly in data['features']: if poly['properties']['dc_mdb_c'] == 'JHB': area = Area() for i in parser: if int(poly['properties'] ['sal_code']) in parser[i]['includes']: area.ward_code = i area.city_ward_code = parser[i]['city_ref'] + 1 area.sal_code = int(poly['properties']['sal_code']) area.region_id = 1 area.data = parser2[int(poly['properties']['sal_code'])] polygon = wkt.dumps(poly['geometry'], decimals=6) area.geom = polygon db.session.add(area) else: pass db.session.flush() db.session.commit() del data del data2 print 'Populating city ward GIS data...' data3 = geojson.load( open( 'C:/Users/mrade_000/Documents/GitHub/scoda/scoda/data/MunicipalWards2016.json', 'r')) for i in data3['features']: if 'Johannesburg' in i['properties']['MunicName']: ward = Ward() ward.ward_code = int(i['properties']['WardID']) ward.city_ward_code = int(i['properties']['WardNo']) ward.region_id = 1 D = zeros( (len(parser[int(i['properties']['WardID'])]['includes']), 35)) for j, J in enumerate(parser[int( i['properties']['WardID'])]['includes']): D[j, :] = parser2[J] ward.data = list(sum(D, axis=0)) ward.geom = wkt.dumps(i['geometry'], decimals=6) db.session.add(ward) db.session.flush() db.session.commit() del data3
def cartodb2ogr(service_endpoint, aoi, out_fields, where='', _=''): global FUNCTION_COUNT FUNCTION_COUNT += 1 logging.info('FUNCTION cartodb2ogr STEP {} START'.format(FUNCTION_COUNT)) t0 = time() endpoint_template = 'https://{}.carto.com/tables/{}/' username, table = search(endpoint_template, service_endpoint + '/') url = 'https://{username}.carto.com/api/v2/sql'.format(username=username) if isinstance(aoi, str): aoi = json.loads(aoi) # raise ValueError() params = {} fields = ['ST_AsGeoJSON(the_geom) as geometry'] out_fields = out_fields.split(',') for field in out_fields: if field: fields.append('{field} as {field}'.format(field=field)) temp = "ST_Intersects(ST_Buffer(ST_GeomFromText('{}',4326),0),the_geom)" features = [] objectids = [] for f in aoi['features']: where_clause = temp.format( wkt.dumps({ 'type': 'Polygon', 'coordinates': bbox(f) })) if where and not where == '1=1': where_clause += 'AND {}'.format(where) q = 'SELECT {fields} FROM {table} WHERE {where}' params = { 'q': q.format(fields=','.join(fields), table=table, where=where_clause) } try: req = requests.get(url, params=params) req.raise_for_status() except Exception as e: raise ValueError((e, url, bbox(f))) response = json.loads(req.text)['rows'] features += [{ 'type': 'Feature', 'geometry': json.loads(h['geometry']), 'properties': {field: h[field] for field in out_fields if field} } for h in response] featureset = json2ogr({'type': 'FeatureCollection', 'features': features}) logging.info('FUNCTION cartodb2ogr STEP {} DONE - {} SECONDS'.format( FUNCTION_COUNT, time() - t0)) return featureset
def tab1_accept(self): """ Load layers tab to load metrices and departments. """ #################################################### ## Load department #################################################### selected_department = self.dlg.comboBox.currentText() if selected_department == '-' and len(self.dlg.selected_items) == 0: QMessageBox.information(self.dlg.centralwidget, "Message", "Nothing selected to load", QMessageBox.Yes) return proj = QgsProject.instance() current_layer = [x.name() for x in proj.mapLayers().values()] if selected_department != '-': level_boundaries = self.database.department_polygon( selected_department) vectorLayer = None if selected_department not in current_layer: vectorLayer = QgsVectorLayer('Polygon?crs=epsg:4326', selected_department, 'memory') # vectorLayer.setCustomProperty("showFeatureCount", len(list(level_boundaries))) proj.addMapLayer(vectorLayer) else: vectorLayerTemp = proj.mapLayersByName(selected_department) if len(vectorLayerTemp) > 0: vectorLayer = vectorLayerTemp[0] vectorLayer.dataProvider().truncate() if vectorLayer: prov = vectorLayer.dataProvider() new_coords = [] fields = QgsFields() for x in ['name', 'level_type']: fields.append(QgsField(x, QVariant.String)) prov.addAttributes(fields) vectorLayer.updateFields() fields = prov.fields() for level_boundary in level_boundaries: try: polygons = [[[ QgsPointXY(point[0], point[1]) for point in polygon ] for polygon in multi_polygon] for multi_polygon in level_boundary['geometry']['coordinates']] geom = QgsGeometry.fromMultiPolygonXY(polygons) outGeom = QgsFeature() outGeom.setFields(fields) outGeom.setAttribute('name', level_boundary['name']) outGeom.setAttribute('level_type', level_boundary['level_type']) outGeom.setGeometry(geom) new_coords.append(outGeom) except Exception as e: print(e) # single_symbol_renderer = vectorLayer.renderer() # symbol = single_symbol_renderer.symbol() # symbol.setOpacity(45) # symbols = QgsSingleSymbolRenderer(symbol) # vectorLayer.triggerRepaint() #Seems to work symbol = QgsFillSymbol.defaultSymbol( vectorLayer.geometryType()) symbol.setOpacity(0.6) myRenderer = QgsSingleSymbolRenderer(symbol) vectorLayer.setRenderer(myRenderer) prov.addFeatures(new_coords) vectorLayer.updateExtents() # print (vectorLayer.featureCount()) ################################################################ # Display selected metrics # ################################################################ if len(self.dlg.selected_items) > 0: progress_bar = ProgressBar(len(self.dlg.selected_items)) for index, item in enumerate(self.dlg.selected_items): vectorLayer = None assets = self.database.service_metrics_geometry( self.check_list[item]) if assets.count() > 0: if item not in current_layer: geometry_type = 'Point' if 'type' in assets[0]['geometry']: geometry_type = assets[0]['geometry']['type'] vectorLayer = QgsVectorLayer(geometry_type + '?crs=epsg:4326', item, 'memory') #32643#4326 proj.addMapLayer(vectorLayer) else: vectorLayerTemp = proj.mapLayersByName(item) if len(vectorLayerTemp) > 0: vectorLayer = vectorLayerTemp[0] vectorLayer.dataProvider().truncate() if vectorLayer: prov = vectorLayer.dataProvider() #fields = prov.fields() #vectorLayer.updateFields() #feat =vectorLayer.getFeatures() #attrs = feat.attributes() #geom = feat.geometry() #coords = geom.asPoint() new_coords = [] fields = QgsFields() for x, y in assets[0]['properties'].items(): fields.append(QgsField(x, QVariant.String)) prov.addAttributes(fields) vectorLayer.updateFields() fields = prov.fields() for asset in assets: try: if 'type' in asset[ 'geometry'] and 'coordinates' in asset[ 'geometry']: if '_id' in asset['geometry']: del (asset['geometry']['_id']) if 'created_at' in asset['geometry']: del (asset['geometry']['created_at']) if 'updated_at' in asset['geometry']: del (asset['geometry']['updated_at']) geom = QgsGeometry.fromWkt( wkt.dumps(asset['geometry'])) outGeom = QgsFeature() outGeom.setGeometry(geom) outGeom.setFields(fields) for x, y in asset['properties'].items(): outGeom.setAttribute(x, str(y)) new_coords.append(outGeom) except Exception as e: print(e) #outGeom.setAttributes(attrs) try: if len(new_coords) > 0: prov.addFeatures(new_coords) vectorLayer.updateExtents() # feature_layer = FeatureModifier(vectorLayer) except: pass progress_bar.update_progress(index + 1) root = QgsProject.instance().layerTreeRoot() for child in root.children(): if isinstance(child, QgsLayerTreeLayer): child.setCustomProperty("showFeatureCount", True) qgis.utils.iface.zoomToActiveLayer() self.dlg.close()
def graph_from_ckan(self, dataset_dict): dataset_ref = self.dataset_uri(dataset_dict) g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None), ('notes', DCT.description, None), ('url', DCAT.landingPage, None), ('identifier', DCT.identifier, ['guid', 'id']), ('version', OWL.versionInfo, ['dcat_version']), ('alternate_identifier', ADMS.identifier, None), ('version_notes', ADMS.versionNotes, None), ('frequency', DCT.accrualPeriodicity, ['frequency-of-update']), ('name', ADEQUATE.repositoryName, ['id']), ('categorization', ADEQUATE.category, None), ] _add_triples_from_dict(self.g, dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): if isinstance(tag, dict): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) elif isinstance(tag, basestring): g.add((dataset_ref, DCAT.keyword, Literal(tag))) # Dates items = [ ('issued', DCT.issued, ['metadata_created']), ('modified', DCT.modified, ['metadata_modified']), ] _add_date_triples_from_dict(self.g, dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None), ('theme', DCAT.theme, None), ('conforms_to', DCAT.conformsTo, None), ] _add_list_triples_from_dict(self.g, dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(contact_uri) else: bnode_hash = hashlib.sha1(dataset_ref.n3() + DCAT.contactPoint.n3()) contact_details = BNode(bnode_hash.hexdigest()) g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) # contact-email was added as "most frequent extra key" items = [ ('contact_name', VCARD.fn, ['maintainer', 'author']), ('contact_email', VCARD.hasEmail, ['maintainer_email', 'author_email', 'contact-email']), ] _add_triples_from_dict(self.g, dataset_dict, contact_details, items) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] publisher_uri = self.publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri id_string = dataset_ref.n3() + DCT.publisher.n3() + publisher_name bnode_hash = hashlib.sha1(id_string.encode('utf-8')) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [ ('publisher_email', FOAF.mbox, None), ('publisher_url', FOAF.homepage, None), ('publisher_type', DCT.type, None), ] _add_triples_from_dict(self.g, dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start', default='') end = self._get_dataset_value(dataset_dict, 'temporal_end', default='') if start or end: id_string = dataset_ref.n3() + DCT.temporal.n3() + start + end bnode_hash = hashlib.sha1(id_string.encode('utf-8')) temporal_extent = BNode(bnode_hash.hexdigest()) g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: _add_date_triple(self.g, temporal_extent, SCHEMA.startDate, start) if end: _add_date_triple(self.g, temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri', default='') spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text', default='') spatial_geom = self._get_dataset_value(dataset_dict, 'spatial', default='') if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = URIRef(spatial_uri) else: id_string = dataset_ref.n3() + DCT.spatial.n3() + spatial_uri + spatial_text + spatial_geom bnode_hash = hashlib.sha1(id_string.encode('utf-8')) spatial_ref = BNode(bnode_hash.hexdigest()) g.add((spatial_ref, RDF.type, DCT.Location)) g.add((dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add((spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: g.add((spatial_ref, LOCN.geometry, Literal(wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral))) except (TypeError, ValueError, InvalidGeoJSONException): pass # License license_id = self._get_dataset_value(dataset_dict, 'license_id', default='') license_url = self._get_dataset_value(dataset_dict, 'license_url', default='') license_title = self._get_dataset_value(dataset_dict, 'license_title', default='') license = None if license_id or license_url or license_title: if license_url and bool(urlparse.urlparse(license_url).netloc): license = URIRef(license_url) else: id_string = dataset_ref.n3() + DCT.license.n3() + license_id + license_url + license_title bnode_hash = hashlib.sha1(id_string.encode('utf-8')) license = BNode(bnode_hash.hexdigest()) # maybe a non-valid url if license_url: g.add((license, RDFS.comment, Literal(license_url))) # l is a license document g.add((license, RDF.type, DCT.LicenseDocument)) if license_title: g.add((license, RDFS.label, Literal(license_title))) if license_id: g.add((license, DCT.identifier, Literal(license_id))) # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(self.resource_uri(resource_dict, dataset_dict.get('id'))) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # License if license: g.add((distribution, DCT.license, license)) # Simple values items = [ ('name', DCT.title, None), ('description', DCT.description, None), ('status', ADMS.status, None), ('rights', DCT.rights, None), ('license', DCT.license, None), ] _add_triples_from_dict(self.g, resource_dict, distribution, items) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): id_string = dataset_ref.n3() + DCT['format'].n3() + resource_dict['format'] bnode_hash = hashlib.sha1(id_string.encode('utf-8')) f = BNode(bnode_hash.hexdigest()) g.add((f, RDF.type, DCT.MediaTypeOrExtent)) g.add((f, RDFS.label, Literal(resource_dict['format']))) g.add((distribution, DCT['format'], f)) if resource_dict.get('mimetype'): g.add((f, RDF.value, Literal(resource_dict['mimetype']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: download_url = download_url.strip() if is_valid_uri(download_url): g.add((distribution, DCAT.downloadURL, URIRef(download_url))) else: g.add((distribution, DCAT.downloadURL, Literal(download_url))) if (url and not download_url) or (url and url != download_url): url = url.strip() if is_valid_uri(url): g.add((distribution, DCAT.accessURL, URIRef(url))) else: g.add((distribution, DCAT.accessURL, Literal(url))) # Dates # metadata-date was added as "most frequent extra key" items = [ ('issued', DCT.issued, ['created', 'metadata-date']), ('modified', DCT.modified, ['last_modified']), ] _add_date_triples_from_dict(self.g, resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) return dataset_ref
def extract_data(self, id): g = self._init_graph(id) # Basic fields items = [ ("title", DCT.title, None, Literal), ("notes", DCT.description, None, Literal), # ('url', DCAT.landingPage, None, URIRef), ("identifier", DCT.identifier, ["guid", "id"], Literal), ("version", OWL.versionInfo, ["version"], Literal), ("version_notes", ADMS.versionNotes, None, Literal), ("frequency", DCT.accrualPeriodicity, None, Literal), ("access_rights", DCT.accessRights, None, Literal), ("dcat_type", DCT.type, None, Literal), ("provenance", DCT.provenance, None, Literal), ] for triple in get_triples_from_dict(self._dataset_dict, self._dataset_ref, items): g.add(triple) g.add((self._dataset_ref, DCAT.landingPage, URIRef(self._dataset_url))) # Tags for tag in self._dataset_dict.get("tags", []): g.add((self._dataset_ref, DCAT.keyword, Literal(tag["name"]))) # Dates items = [ ("issued", DCT.issued, ["metadata_created"], Literal), ("modified", DCT.modified, ["metadata_modified"], Literal), ] for triple in get_triples_from_dict(self._dataset_dict, self._dataset_ref, items, date_value=True): g.add(triple) # Lists items = [ ("language", DCT.language, None, Literal), ("theme", DCAT.theme, None, URIRef), ("conforms_to", DCT.conformsTo, None, Literal), ("alternate_identifier", ADMS.identifier, None, Literal), ("documentation", FOAF.page, None, Literal), # TODO: why we dont have this field? # ('related_resource', DCT.relation, None, Literal), ("has_version", DCT.hasVersion, None, Literal), ("is_version_of", DCT.isVersionOf, None, Literal), ("source", DCT.source, None, Literal), ("sample", ADMS.sample, None, Literal), ] for triple in get_list_triples_from_dict(self._dataset_dict, self._dataset_ref, items): g.add(triple) # Contact details if any( self._dataset_dict.get(field) for field in [ "contact_uri", "contact_name", "contact_email", "maintainer", "maintainer_email", "author", "author_email", ]): contact_uri = self._dataset_dict.get("contact_uri") if contact_uri: contact_details = CleanedURIRef(contact_uri) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Organization)) g.add((self._dataset_ref, DCAT.contactPoint, contact_details)) for triple in get_triple_from_dict( self._dataset_dict, contact_details, VCARD.fn, "contact_name", ["maintainer", "author"], ): g.add(triple) # Add mail address as URIRef, and ensure it has a mailto: prefix for triple in get_triple_from_dict( self._dataset_dict, contact_details, VCARD.hasEmail, "contact_email", ["maintainer_email", "author_email"], _type=URIRef, value_modifier=add_mailto, ): g.add(triple) # Publisher if any( self._dataset_dict.get(field) for field in [ "publisher_uri", "publisher_name", "organization", ]): publisher_uri = publisher_uri_from_dataset_dict(self._dataset_dict) if publisher_uri: publisher_details = CleanedURIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((self._dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._dataset_dict.get("publisher_name") if not publisher_name and self._dataset_dict.get("organization"): publisher_name = self._dataset_dict["organization"]["title"] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the self._dataset_dict does not include # custom fields items = [ ("publisher_email", FOAF.mbox, None, Literal), ("publisher_url", FOAF.homepage, None, URIRef), ("publisher_type", DCT.type, None, Literal), ] for triple in get_triples_from_dict(self._dataset_dict, publisher_details, items): g.add(triple) # Temporal start = self._dataset_dict.get("temporal_start") end = self._dataset_dict.get("temporal_end") if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: g.add(get_date_triple(temporal_extent, SCHEMA.startDate, start)) if end: g.add(get_date_triple(temporal_extent, SCHEMA.endDate, end)) g.add((self._dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._dataset_dict.get("spatial_uri") spatial_text = self._dataset_dict.get("spatial_text") spatial_geom = self._dataset_dict.get("spatial") if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = CleanedURIRef(spatial_uri) else: spatial_ref = BNode() g.add((spatial_ref, RDF.type, DCT.Location)) g.add((self._dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add(( spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT), )) # WKT, because GeoDCAT-AP says so try: g.add(( spatial_ref, LOCN.geometry, Literal( wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral, ), )) except (TypeError, ValueError, InvalidGeoJSONException): pass # Resources for resource_dict in self._dataset_dict.get("resources", []): distribution = CleanedURIRef(resource_uri(resource_dict)) g.add((self._dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [ ("name", DCT.title, None, Literal), ("description", DCT.description, None, Literal), ("status", ADMS.status, None, Literal), ("rights", DCT.rights, None, Literal), # TODO: we are avoiding licenses right now # ('license', DCT.license, None, Literal), ("access_url", DCAT.accessURL, None, URIRef), ("download_url", DCAT.downloadURL, None, URIRef), ] for triple in get_triples_from_dict(resource_dict, distribution, items): g.add(triple) # Lists items = [ ("documentation", FOAF.page, None, Literal), ("language", DCT.language, None, Literal), ("conforms_to", DCT.conformsTo, None, Literal), ] for triple in get_list_triples_from_dict(resource_dict, distribution, items): g.add(triple) # Format if "/" in resource_dict.get("format", ""): g.add(( distribution, DCAT.mediaType, Literal(resource_dict["format"]), )) else: if resource_dict.get("format"): g.add(( distribution, DCT["format"], Literal(resource_dict["format"]), )) if resource_dict.get("mimetype"): g.add(( distribution, DCAT.mediaType, Literal(resource_dict["mimetype"]), )) # URL fallback and old behavior url = resource_dict.get("url") download_url = resource_dict.get("download_url") access_url = resource_dict.get("access_url") # Use url as fallback for access_url if access_url is not # set and download_url is not equal if url and not access_url: if (not download_url) or (download_url and url != download_url): for triple in get_triple_from_dict( resource_dict, distribution, DCAT.accessURL, "url", _type=URIRef, ): g.add(triple) # Dates items = [ ("issued", DCT.issued, None, Literal), ("modified", DCT.modified, None, Literal), ] for triple in get_triples_from_dict(resource_dict, distribution, items, date_value=True): g.add(triple) # Numbers if resource_dict.get("size"): try: g.add(( distribution, DCAT.byteSize, Literal( float(resource_dict["size"]), datatype=XSD.decimal, ), )) except (ValueError, TypeError): g.add(( distribution, DCAT.byteSize, Literal(resource_dict["size"]), )) # Checksum if resource_dict.get("hash"): checksum = BNode() g.add((checksum, RDF.type, SPDX.Checksum)) g.add(( checksum, SPDX.checksumValue, Literal(resource_dict["hash"], datatype=XSD.hexBinary), )) if resource_dict.get("hash_algorithm"): if resource_dict["hash_algorithm"].startswith("http"): g.add(( checksum, SPDX.algorithm, CleanedURIRef(resource_dict["hash_algorithm"]), )) else: g.add(( checksum, SPDX.algorithm, Literal(resource_dict["hash_algorithm"]), )) g.add((distribution, SPDX.checksum, checksum)) # graph.add((dataset. RDF.about , Literal())) return {"data": g.serialize(format="pretty-xml")}
def test_4d(self): mp = dict(type='MultiPoint', coordinates=[ [100.0, 3.1, 1, 0], [101.0, 2.1, 2, 0], [3.14, 2.18, 3, 0], ]) expected = WKT['multipoint']['4d'] self.assertEqual(expected, wkt.dumps(mp, decimals=2))
def test_2d(self): mp = dict(type='MultiPoint', coordinates=[ [100.0, 3.101], [101.0, 2.1], [3.14, 2.18], ]) expected = WKT['multipoint']['2d'] self.assertEqual(expected, wkt.dumps(mp, decimals=3))
def graph_from_dataset(self, dataset_dict, dataset_ref): g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None, Literal), ('notes', DCT.description, None, Literal), ('url', DCAT.landingPage, None, URIRef), ('identifier', DCT.identifier, ['guid', 'id'], Literal), ('version', OWL.versionInfo, ['dcat_version'], Literal), ('version_notes', ADMS.versionNotes, None, Literal), ('frequency', DCT.accrualPeriodicity, None, Literal), ('access_rights', DCT.accessRights, None, Literal), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DCT.issued, ['metadata_created'], Literal), ('modified', DCT.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None, Literal), ('theme', DCAT.theme, None, URIRef), ('conforms_to', DCT.conformsTo, None, Literal), ('alternate_identifier', ADMS.identifier, None, Literal), ('documentation', FOAF.page, None, Literal), ('related_resource', DCT.relation, None, Literal), ('has_version', DCT.hasVersion, None, Literal), ('is_version_of', DCT.isVersionOf, None, Literal), ('source', DCT.source, None, Literal), ('sample', ADMS.sample, None, Literal), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(contact_uri) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) items = [ ('contact_name', VCARD.fn, ['maintainer', 'author'], Literal), ('contact_email', VCARD.hasEmail, ['maintainer_email', 'author_email'], Literal), ] self._add_triples_from_dict(dataset_dict, contact_details, items) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [ ('publisher_email', FOAF.mbox, None, Literal), ('publisher_url', FOAF.homepage, None, URIRef), ('publisher_type', DCT.type, None, Literal), ] self._add_triples_from_dict(dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start') end = self._get_dataset_value(dataset_dict, 'temporal_end') if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri') spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text') spatial_geom = self._get_dataset_value(dataset_dict, 'spatial') if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = URIRef(spatial_uri) else: spatial_ref = BNode() g.add((spatial_ref, RDF.type, DCT.Location)) g.add((dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add((spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: g.add((spatial_ref, LOCN.geometry, Literal(wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral))) except (TypeError, ValueError, InvalidGeoJSONException): pass # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_uri(resource_dict)) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [ ('name', DCT.title, None, Literal), ('description', DCT.description, None, Literal), ('status', ADMS.status, None, Literal), ('rights', DCT.rights, None, Literal), ('license', DCT.license, None, Literal), ] self._add_triples_from_dict(resource_dict, distribution, items) # Lists items = [ ('documentation', FOAF.page, None, Literal), ('language', DCT.language, None, Literal), ('conforms_to', DCT.conformsTo, None, Literal), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: g.add((distribution, DCAT.downloadURL, URIRef(download_url))) if (url and not download_url) or (url and url != download_url): g.add((distribution, DCAT.accessURL, URIRef(url))) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef(resource_dict['hash_algorithm']))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum))
import sys from osgeo import gdal, ogr with open (r"F:/Master Course Materials/Second Semester/Recommender-Implementation/harvested-datasets/united-kingdom/5fb8813978cc4e4892da4b57bcf4491f_0.geojson", "r") as myfile: data = myfile.read() dataset = json.loads(data) c1, c2; for f in dataset["features"]: for coordinate in f["geometry"]["coordinates"]: c1 = [x[0] for x in coordinate] # first coordinate c2 = [x[1] for x in coordinate] bbox = [[min(c1),min(c2)],[min(c1),max(c2)],[max(c1),max(c2)],[max(c1),min(c2)],[min(c1),min(c2)]] print(bbox); import sys from osgeo import gdal, ogr from geomet import wkt import json with open(r"F:/Master Course Materials/Second Semester/Recommender-Implementation/harvested-datasets/united-kingdom/5fb8813978cc4e4892da4b57bcf4491f_0.geojson", "r") as myfile: data = myfile.read() dataset = json.loads(data) envelopes=[] for f in dataset["features"]: geom = ogr.CreateGeometryFromWkt(wkt.dumps(f["geometry"], decimals=4))) env = geom.GetEnvelope() envelopes.append(env) print(envelopes) # Get Envelope returns a tuple (minX, maxX, minY, maxY) env = geom.GetEnvelope() print(env[0],env[2],env[1],env[3])
def create_line(feature: Dict[str, Any]) -> Iterable: return chain([feature['id']], (str(value) for value in feature['properties'].values()), [wkt.dumps(feature['geometry'])])
def wkt(self): return wkt.dumps(self.spatial)
def seed_db(db): """ Add ETH seed entities to the database. """ with app.app_context(): # data = geojson.load(open('C:/Users/Wasim/Documents/GitHub/scoda/scoda/data/DBN2016_SHP.json')) data = geojson.load(open('%s/data/%s' % (app.root_path, "DBN2016_SHP.json"))) data2 = genfromtxt('%s/data/%s' % (app.root_path, "exporteth16.csv"), delimiter=',', skip_header=1) parser = {} Ws = [52102003, 52102015, 52102016, 52102018, 52201006, 52201008, 52206002, 52206003, 52206004, 52206005, 52206006, 52206007, 52902021, 52903012, 52903013, 52903014, 52903016, 52903017, 52903018] for i, I in enumerate(list(range(59500001, 59500111)) + Ws): parser[int(I)] = {} parser[int(I)]['includes'] = [] parser[int(I)]['city_ref'] = i for i in range(len(data2)): parser[int(data2[i, 2])]['includes'].append(data2[i, 1]) parser2 = {} for i in data2: parser2[int(i[1])] = list(i[20:]) print('Populating ETH enumerator GIS data...') for poly in data['features']: if poly['properties']['dc_mdb_c'] == 'ETH': area = Area() for i in parser: if int(poly['properties']['sal_code']) in parser[i]['includes']: area.ward_code = i area.city_ward_code = parser[i]['city_ref'] + 1 area.sal_code = int(poly['properties']['sal_code']) area.region_id = 4 area.data = parser2[int(poly['properties']['sal_code'])] polygon = wkt.dumps(poly['geometry'], decimals=6) area.geom = polygon db.session.add(area) else: pass db.session.flush() db.session.commit() del data del data2 print ('Populating ETH ward GIS data...') data3 = geojson.load(open('%s/data/%s' % (app.root_path, "MunicipalWards2016.json"), 'r')) for i in data3['features']: if 'eThekwini' in i['properties']['MunicName']: ward = Ward() ward.ward_code = int(i['properties']['WardID']) ward.city_ward_code = int(i['properties']['WardNo']) ward.region_id = 4 D = zeros((len(parser[int(i['properties']['WardID'])]['includes']), 35)) for j, J in enumerate(parser[int(i['properties']['WardID'])]['includes']): D[j, :] = parser2[J] ward.data = list(sum(D, axis=0)) ward.geom = wkt.dumps(i['geometry'], decimals=6) db.session.add(ward) db.session.flush() db.session.commit()
def graph_from_dataset(self, dataset_dict, dataset_ref): g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None, Literal), ('notes', DCT.description, None, Literal), ('url', DCAT.landingPage, None, URIRef), ('identifier', DCT.identifier, ['guid', 'id'], Literal), ('version', OWL.versionInfo, ['dcat_version'], Literal), ('version_notes', ADMS.versionNotes, None, Literal), ('frequency', DCT.accrualPeriodicity, None, URIRef), ('access_rights', DCT.accessRights, None, Literal), ('dcat_type', DCT.type, None, Literal), ('provenance', DCT.provenance, None, Literal), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DCT.issued, ['metadata_created'], Literal), ('modified', DCT.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None, Literal), ('theme', DCAT.theme, None, URIRef), ('conforms_to', DCT.conformsTo, None, Literal), ('alternate_identifier', ADMS.identifier, None, Literal), ('documentation', FOAF.page, None, URIRef), ('related_resource', DCT.relation, None, URIRef), ('has_version', DCT.hasVersion, None, URIRef), ('is_version_of', DCT.isVersionOf, None, URIRef), ('source', DCT.source, None, Literal), ('sample', ADMS.sample, None, Literal), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(self._removeWhitespaces(contact_uri)) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) self._add_triple_from_dict(dataset_dict, contact_details, VCARD.fn, 'contact_name', ['maintainer', 'author']) # Add mail address as URIRef, and ensure it has a mailto: prefix self._add_triple_from_dict(dataset_dict, contact_details, VCARD.hasEmail, 'contact_email', ['maintainer_email', 'author_email'], _type=URIRef, value_modifier=self._add_mailto) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef( self._removeWhitespaces(publisher_uri)) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [ ('publisher_email', FOAF.mbox, None, Literal), ('publisher_url', FOAF.homepage, None, URIRef), ('publisher_type', DCT.type, None, URIRef), ] self._add_triples_from_dict(dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start') end = self._get_dataset_value(dataset_dict, 'temporal_end') if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri') spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text') spatial_geom = self._get_dataset_value(dataset_dict, 'spatial') if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = URIRef(self._removeWhitespaces(spatial_uri)) else: spatial_ref = BNode() g.add((spatial_ref, RDF.type, DCT.Location)) g.add((dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add((spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: g.add((spatial_ref, LOCN.geometry, Literal(wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral))) except (TypeError, ValueError, InvalidGeoJSONException): pass # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef( self._removeWhitespaces(resource_uri(resource_dict))) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [('name', DCT.title, None, Literal), ('description', DCT.description, None, Literal), ('status', ADMS.status, None, URIRef), ('rights', DCT.rights, None, URIRef), ('license', DCT.license, None, URIRef), ('access_url', DCAT.accessURL, None, URIRef), ('download_url', DCAT.downloadURL, None, URIRef)] self._add_triples_from_dict(resource_dict, distribution, items) # Lists items = [ ('documentation', FOAF.page, None, URIRef), ('language', DCT.language, None, Literal), ('conforms_to', DCT.conformsTo, None, Literal), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Format mimetype = resource_dict.get('mimetype') fmt = resource_dict.get('format') # IANA media types (either URI or Literal) should be mapped as mediaType. # In case format is available and mimetype is not set or identical to format, # check which type is appropriate. if fmt and (not mimetype or mimetype == fmt): if ('iana.org/assignments/media-types' in fmt or not fmt.startswith('http') and '/' in fmt): # output format value as dcat:mediaType instead of dct:format mimetype = fmt fmt = None else: # Use dct:format mimetype = None if mimetype: if mimetype.startswith('http'): g.add((distribution, DCAT.mediaType, URIRef(self._removeWhitespaces(mimetype)))) else: g.add((distribution, DCAT.mediaType, Literal(mimetype))) if fmt: if fmt.startswith('http'): g.add((distribution, DCT['format'], URIRef(self._removeWhitespaces(fmt)))) else: g.add((distribution, DCT['format'], Literal(fmt))) # URL fallback and old behavior url = resource_dict.get('url') download_url = resource_dict.get('download_url') access_url = resource_dict.get('access_url') # Use url as fallback for access_url if access_url is not set and download_url is not equal if (url and ((not (access_url or download_url)) or ((not access_url) and (download_url and url != download_url)))): self._add_triple_from_dict(resource_dict, distribution, DCAT.accessURL, 'url', _type=URIRef) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, RDF.type, SPDX.Checksum)) g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef( self._removeWhitespaces( resource_dict['hash_algorithm'])))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum))
def test_dumps_empty_geometrycollection(self): geom = dict(type='GeometryCollection', geometries=[]) self.assertEqual('GEOMETRYCOLLECTION EMPTY', wkt.dumps(geom))
def test_3d(self): # Test for an XYZ/XYM Point: pt = dict(type='Point', coordinates=[0.0, -1.0, 2.0]) expected = WKT['point']['3d'] self.assertEqual(expected, wkt.dumps(pt))
def geojson2wkt(geojson): #print geojson coordinates = wkt.dumps(geojson, 6) # print what kind of geometry we are dealing with #print coordinates[0:coordinates.find(" ")] return coordinates
def test_2d_6_decimals(self): pt = dict(type='Point', coordinates=[-10, -77]) expected = 'POINT (-10.000000 -77.000000)' self.assertEqual(expected, wkt.dumps(pt, decimals=6))
import pyarrow as pa import pyarrow.parquet as pq import s3fs import pandas as pd with open('paloalto.geojson', mode='rb') as f: d = json.load(f) results = [] for i in d['features']: result = {} name = dictor(i, "properties.name:en") type = dictor(i, "geometry.type") geojson = dictor(i, "geometry") if type in ['Polygon', 'MultiPolygon']: result['name'] = name result['coordinates'] = wkt.dumps(geojson, decimals=7) results.append(result) # data_frame = pd.DataFrame(results) table = pa.Table.from_pandas(data_frame) filesystem = s3fs.S3FileSystem() path = "s3://s3-path-here" pq.write_to_dataset(table, root_path=path, compression='gzip', filesystem=filesystem)
def test_4d(self): ls = dict(type='LineString', coordinates=[[100.0, 0.0, -60.0, 0.1], [101.0, 1.0, -65.25, 0.2]]) expected = WKT['linestring']['4d'] self.assertEqual(expected, wkt.dumps(ls))
def graph_from_dataset(self, dataset_dict, dataset_ref): g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None), ('notes', DCT.description, None), ('url', DCAT.landingPage, None), ('identifier', DCT.identifier, ['guid', 'id']), ('version', OWL.versionInfo, ['dcat_version']), ('version_notes', ADMS.versionNotes, None), ('frequency', DCT.accrualPeriodicity, None), ('access_rights', DCT.accessRights, None), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DCT.issued, ['metadata_created']), ('modified', DCT.modified, ['metadata_modified']), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None), ('theme', DCAT.theme, None), ('conforms_to', DCT.conformsTo, None), ('alternate_identifier', ADMS.identifier, None), ('documentation', FOAF.page, None), ('related_resource', DCT.relation, None), ('has_version', DCT.hasVersion, None), ('is_version_of', DCT.isVersionOf, None), ('source', DCT.source, None), ('sample', ADMS.sample, None), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(contact_uri) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) items = [ ('contact_name', VCARD.fn, ['maintainer', 'author']), ('contact_email', VCARD.hasEmail, ['maintainer_email', 'author_email']), ] self._add_triples_from_dict(dataset_dict, contact_details, items) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [ ('publisher_email', FOAF.mbox, None), ('publisher_url', FOAF.homepage, None), ('publisher_type', DCT.type, None), ] self._add_triples_from_dict(dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start') end = self._get_dataset_value(dataset_dict, 'temporal_end') if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri') spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text') spatial_geom = self._get_dataset_value(dataset_dict, 'spatial') if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = URIRef(spatial_uri) else: spatial_ref = BNode() g.add((spatial_ref, RDF.type, DCT.Location)) g.add((dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add((spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: g.add((spatial_ref, LOCN.geometry, Literal(wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral))) except (TypeError, ValueError, InvalidGeoJSONException): pass # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_uri(resource_dict)) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [ ('name', DCT.title, None), ('description', DCT.description, None), ('status', ADMS.status, None), ('rights', DCT.rights, None), ('license', DCT.license, None), ] self._add_triples_from_dict(resource_dict, distribution, items) # Lists items = [ ('documentation', FOAF.page, None), ('language', DCT.language, None), ('conforms_to', DCT.conformsTo, None), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: g.add((distribution, DCAT.downloadURL, Literal(download_url))) if (url and not download_url) or (url and url != download_url): g.add((distribution, DCAT.accessURL, Literal(url))) # Dates items = [ ('issued', DCT.issued, None), ('modified', DCT.modified, None), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef(resource_dict['hash_algorithm']))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum))
def test_4d(self): poly = dict(type='Polygon', coordinates=[[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]]]) expected = WKT['polygon']['4d'] self.assertEqual(expected, wkt.dumps(poly, decimals=0))
def str_encode(self, value): return wkt.dumps(self.json_encode(value))
def test_basic(self): gc = { 'geometries': [ { 'coordinates': [0.0, 1.0], 'type': 'Point' }, { 'coordinates': [[-100.0, 0.0], [-101.0, -1.0]], 'type': 'LineString' }, { 'coordinates': [[[100.001, 0.001], [101.1235, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]]], 'type': 'Polygon' }, { 'coordinates': [[100.0, 3.101], [101.0, 2.1], [3.14, 2.18]], 'type': 'MultiPoint' }, { 'coordinates': [[[0.0, -1.0], [-2.0, -3.0], [-4.0, -5.0]], [[1.66, -31023.5, 1.1], [10000.9999, 3.0, 2.2], [100.9, 1.1, 3.3], [0.0, 0.0, 4.4]]], 'type': 'MultiLineString' }, { 'coordinates': [[[[100.001, 0.001], [101.001, 0.001], [101.001, 1.001], [100.001, 0.001]], [[100.201, 0.201], [100.801, 0.201], [100.801, 0.801], [100.201, 0.201]]], [[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [1.0, 2.0, 3.0, 4.0]]]], 'type': 'MultiPolygon' }, ], 'type': 'GeometryCollection', } expected = ( 'GEOMETRYCOLLECTION ' '(POINT (0.000 1.000),' 'LINESTRING (-100.000 0.000, -101.000 -1.000),' 'POLYGON ((100.001 0.001, 101.124 0.001, 101.001 1.001, ' '100.001 0.001), (100.201 0.201, 100.801 0.201, 100.801 0.801, ' '100.201 0.201)),' 'MULTIPOINT ((100.000 3.101), (101.000 2.100), (3.140 2.180)),' 'MULTILINESTRING ((0.000 -1.000, -2.000 -3.000, -4.000 -5.000), ' '(1.660 -31023.500 1.100, 10001.000 3.000 2.200, ' '100.900 1.100 3.300, 0.000 0.000 4.400)),' 'MULTIPOLYGON (((100.001 0.001, 101.001 0.001, 101.001 1.001, ' '100.001 0.001), ' '(100.201 0.201, 100.801 0.201, 100.801 0.801, 100.201 0.201)), ' '((1.000 2.000 3.000 4.000, 5.000 6.000 7.000 8.000, ' '9.000 10.000 11.000 12.000, 1.000 2.000 3.000 4.000))))') self.assertEqual(expected, wkt.dumps(gc, decimals=3))
""" conversions from/to GeoJSON to WKT/WKB and back for use with socrata https://github.com/larsbutler/geomet """ from geomet import wkt point = {'type': 'Point', 'coordinates': [116.4, 45.2, 11.1]} point_wkt = wkt.dumps(point, decimals=4) print(point_wkt) poly = {'type': 'Polygon', 'coordinates': [ [ [0.0, 0.0], [10.0, 30.0], [30.0, 10.0], [0.0, 0.0] ] ] # I still don't get this additional pair of brackets } poly_wkt = wkt.dumps(poly, decimals=2) print(poly_wkt) # http://pcjericks.github.io/py-gdalogr-cookbook/geometry.html#create-a-linestring #
def generateBounding(): cursor = selectDataset() for row in cursor: #united-kingdom/39bc26ae67cf47f395cdec351c36b43a_0.geojson try: with open( r"F:/Master Course Materials/Second Semester/Recommender-Implementation/harvested-datasets/" + row[2], "r", encoding="utf8") as myfile: dataset = json.loads(myfile.read()) epsg = 4326 bbox = [-180, 180, -180, 180] if ('crs' in dataset): print(dataset['crs']['properties']['name']) epsg = int( re.findall('\d+', dataset['crs']['properties']['name'])[-1]) bbox = [-180, 123456789, -180, 123456789] else: epsg = 4326 if ('bbox' in dataset): bbox = dataset['bbox'] else: polygons = [] envelopes = [] for f in dataset["features"]: geom = ogr.CreateGeometryFromWkt( wkt.dumps(f["geometry"], decimals=4)) env = geom.GetEnvelope() polygons.append([env[0], env[1], env[2], env[3]]) envelopes.append(env) for polygon in polygons: # print(polygon); if polygon[0] > bbox[0]: bbox[0] = polygon[0] if polygon[1] < bbox[1]: bbox[1] = polygon[1] if polygon[2] > bbox[2]: bbox[2] = polygon[2] if polygon[3] < bbox[3]: bbox[3] = polygon[3] # listPoint = [[bbox[0], bbox[2]],[bbox[0], bbox[3]],[bbox[1],bbox[3]],[bbox[1],bbox[2]],[bbox[0], bbox[2]]]; poly_wkt = "SRID=" + str(epsg) + ";" + "Polygon((" + str( bbox[0]) + " " + str(bbox[2]) + "," + str( bbox[0]) + " " + str(bbox[3]) + "," + str( bbox[1]) + " " + str(bbox[3]) + "," + str( bbox[1]) + " " + str(bbox[2]) + "," + str( bbox[0]) + " " + str(bbox[2]) + "))" # print(poly); # print(epsg); # print(row[2]) # polygon_geom = {'type': 'Polygon', 'coordinates': [listPoint]}; # break; updateDataset(row[1], epsg, "ST_GeomFromEWKT('" + poly_wkt + "')") poly_wkt = "" epsg = 4326 bbox = [-180, 180, -180, 180] except Exception: filenames.append(row[2]) # print(filenames); print(len(filenames)) traceback.print_exc()