-
Notifications
You must be signed in to change notification settings - Fork 0
/
osmtrees2couchdb.py
139 lines (115 loc) · 4.98 KB
/
osmtrees2couchdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# -*- coding: utf-8 -*-
import xml.parsers.expat
import mpcouch
import bz2
# import eventlet
currentTreeData = [] # just to initialize
#entries = 0
nodes = 0
ways = 0
relations = 0
def elementReader(filename):
couchPusher = mpcouch.mpcouchPusher("http://gi88.geoinfo.tuwien.ac.at:5984/osmnodesvienna",30000,threads = False, jobsbuffersizemax = 20)
oldids = []
def gotCompleteEntry(entry):
# couchPusher.pushData({'data':entry, '_id': entry[0]['version'] + '-' + entry[0]['id']})
couchPusher.pushData({'data':entry})
#print(entry)
pass
def start_osm_element(name, attrs):
global currentTreeData
if name == "node":
'''start collecting information including all sub-keys'''
currentTreeData = []
currentTreeData.append(attrs) # the current meta-information
currentTreeData.append({}) # for the tags
elif name == "tag":
'''collect the tag-information'''
key = attrs[u'k']
value = attrs[u'v']
#print key
#print value
currentTreeData[1][key] = value
else:
print("uncatched element: {}".format(name))
def end_osm_element(name):
global currentTreeData
global entries
global nodes
global ways
global relations
#entries += 1
#if entries % 1000000 == 0: print("Processed {} XML entries".format(entries))
if name == "node":
#nodes += 1
#if nodes % 100000 == 0: print("Processed {} OSM nodes".format(nodes))
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
For reduction of the amount of documents, we only include non-tree
data, if it represents deleted nodes.
The following code is there to make sure only trees are kept.
"""
#print("visible = {}".format(currentTreeData[0]['visible']))
"""
if len(currentTreeData[1]) == 0:
gotCompleteEntry(currentTreeData)
#if currentTreeData[0]['visible'] != "true": print(currentTreeData[0]['visible'])
elif 'natural' in currentTreeData[1]:
if currentTreeData[1]['natural'] == 'tree': # yay, it's a tree !
if currentTreeData[0]['version'] != 1:
oldids.append(currentTreeData[0]['id'])
# print("oldidsremark: {}".format(len(currentTreeData[0]['version'])))
gotCompleteEntry(currentTreeData)
"""
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
The following checks if we are required to include this doc in any way.
This is the case, if it has an id which is contained in the oldversions variabel.
"""
"""
if currentTreeData[0]['id'] in oldids:
print("Got old version: {} of {}, adding.".format(len(currentTreeData[0]['version']), currentTreeData[0]['id'] ))
gotCompleteEntry(currentTreeData)
"""
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
gotCompleteEntry(currentTreeData)
elif name == "way":
#ways += 1
#if ways % 100000 == 0: print("Processed {} OSM ways".format(ways))
# !!!!!!!!!!!!!!!!!!!!!! DIRTY HACK HERE !!!!!!!!!!!!!!!!!!
# !!!!!!!!!!!!!!!!!!!! DO NOT TRY AT HOME !!!!!!!!!!!!!!!!!
couchPusher.finish()
quit()
pass
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
elif name == "relation":
pass
#relations += 1
#if relations % 100000 == 0: print("Processed {} OSM relations".format(relations))
elif name == "tag":
pass
else:
print("Unknown element: {}".format(name))
def char_osm_data(data):
pass
osmParser = xml.parsers.expat.ParserCreate()
osmParser.StartElementHandler = start_osm_element
osmParser.EndElementHandler = end_osm_element
#osmParser.CharacterDataHandler = char_osm_data
if filename[-3:] == 'bz2':
with bz2.open(filename, 'rb') as osmFile:
print("start parsing")
osmParser.ParseFile(osmFile)
print("finished parsing")
else:
with open(filename, 'rb') as osmFile:
print("start parsing")
osmParser.ParseFile(osmFile)
print("finished parsing")
couchPusher.finish()
if __name__ == '__main__':
print("running import")
elementReader("/datenspeicher/OSM_full_history/vienna-history-151207.osh.bz2")
print("finished import")