def iter_changeset_stream( start_sqn=None, base_url='https://planet.openstreetmap.org/replication/changesets', expected_interval=60, parse_timestamps=True, state_dir=None): """Start processing an OSM changeset stream and yield one (action, primitive) tuple at a time to the caller.""" # This is a lot like the other osm_stream except there's no # state file for each of the diffs, so just push ahead until # we run into a 404. # If the user specifies a state_dir, read the state from the statefile there if state_dir: if not os.path.exists(state_dir): raise Exception('Specified state_dir "%s" doesn\'t exist.' % state_dir) if os.path.exists('%s/state.yaml' % state_dir): with open('%s/state.yaml' % state_dir, 'r') as f: state = readState(f, ': ') start_sqn = state['sequence'] # If no start_sqn, assume to start from the most recent changeset file if not start_sqn: u = requests.get('%s/state.yaml' % base_url) u.raise_for_status() state = readState(u.text, ': ') sequenceNumber = int(state['sequence']) else: sequenceNumber = int(start_sqn) interval_fudge = 0.0 while True: sqnStr = str(sequenceNumber).zfill(9) url = '%s/%s/%s/%s.osm.gz' % (base_url, sqnStr[0:3], sqnStr[3:6], sqnStr[6:9]) delay = 1.0 while True: content = requests.get(url) if content.status_code == 404: time.sleep(delay) delay = min(delay * 2, 13) interval_fudge += delay continue content = io.BytesIO(content.content) gzipper = gzip.GzipFile(fileobj=content) interval_fudge -= (interval_fudge / 2.0) break obj = None for event, elem in etree.iterparse(gzipper, events=('start', 'end')): if event == 'start': if elem.tag == 'changeset': obj = model.Changeset( int(elem.attrib['id']), isoToDatetime(elem.attrib.get('created_at')) if parse_timestamps else elem.attrib.get('created_at'), isoToDatetime(elem.attrib.get('closed_at')) if parse_timestamps else elem.attrib.get('closed_at'), maybeBool(elem.attrib['open']), maybeFloat(elem.get('min_lat')), maybeFloat(elem.get('max_lat')), maybeFloat(elem.get('min_lon')), maybeFloat(elem.get('max_lon')), elem.attrib.get('user'), maybeInt(elem.attrib.get('uid')), []) elif elem.tag == 'tag': obj.tags.append( model.Tag(elem.attrib['k'], elem.attrib['v'])) elif event == 'end': if elem.tag == 'changeset': yield obj obj = None yield model.Finished(sequenceNumber, None) sequenceNumber += 1 if state_dir: with open('%s/state.yaml' % state_dir, 'w') as f: f.write('sequence: %d' % sequenceNumber)
def iter_osm_file(f, parse_timestamps=True): """Parse a file-like containing OSM XML and yield one OSM primitive at a time to the caller.""" obj = None for event, elem in etree.iterparse(f, events=('start', 'end')): if event == 'start': if elem.tag == 'node': obj = model.Node( int(elem.attrib['id']), maybeInt(elem.get('version')), maybeInt(elem.get('changeset')), elem.attrib.get('user'), maybeInt(elem.attrib.get('uid')), maybeBool(elem.attrib.get('visible')), isoToDatetime(elem.attrib.get('timestamp')) if parse_timestamps else elem.attrib.get('timestamp'), maybeFloat(elem.get('lat')), maybeFloat(elem.get('lon')), []) elif elem.tag == 'way': obj = model.Way( int(elem.attrib['id']), maybeInt(elem.get('version')), maybeInt(elem.get('changeset')), elem.attrib.get('user'), maybeInt(elem.attrib.get('uid')), maybeBool(elem.attrib.get('visible')), isoToDatetime(elem.attrib.get('timestamp')) if parse_timestamps else elem.attrib.get('timestamp'), [], []) elif elem.tag == 'tag': obj.tags.append(model.Tag(elem.attrib['k'], elem.attrib['v'])) elif elem.tag == 'nd': obj.nds.append(int(elem.attrib['ref'])) elif elem.tag == 'relation': obj = model.Relation( int(elem.attrib['id']), maybeInt(elem.get('version')), maybeInt(elem.get('changeset')), elem.attrib.get('user'), maybeInt(elem.attrib.get('uid')), maybeBool(elem.attrib.get('visible')), isoToDatetime(elem.attrib.get('timestamp')) if parse_timestamps else elem.attrib.get('timestamp'), [], []) elif elem.tag == 'member': obj.members.append( model.Member(elem.attrib['type'], int(elem.attrib['ref']), elem.attrib['role'])) elif elem.tag == 'changeset': obj = model.Changeset( int(elem.attrib['id']), isoToDatetime(elem.attrib.get('created_at')) if parse_timestamps else elem.attrib.get('created_at'), isoToDatetime(elem.attrib.get('closed_at')) if parse_timestamps else elem.attrib.get('closed_at'), maybeBool(elem.attrib['open']), maybeFloat(elem.get('min_lat')), maybeFloat(elem.get('max_lat')), maybeFloat(elem.get('min_lon')), maybeFloat(elem.get('max_lon')), elem.attrib.get('user'), maybeInt(elem.attrib.get('uid')), []) elif event == 'end': if elem.tag == 'node': yield obj obj = None elif elem.tag == 'way': yield obj obj = None elif elem.tag == 'relation': yield obj obj = None elif elem.tag == 'changeset': yield obj obj = None elem.clear() while elem.getprevious() is not None: del elem.getparent()[0]