def main(): existing, start_sqn = load_existing_users() user_to_objects = {} logger.info("Starting at sequence %s", start_sqn) for verb, obj in iter_osm_stream(start_sqn=start_sqn): if isinstance(obj, pyosm.model.Finished): for uid, changes in user_to_objects.items(): if uid not in existing: existing.add(uid) logger.info( "New user %s found in changeset %s", uid, changes[0][1].changeset ) # If we end up with users we haven't seen before, # add them to the feed of new users if user_to_objects: update_feeds(user_to_objects) push_existing_users(existing, obj.sequence) user_to_objects = {} logger.info("Finished processing sequence %s", obj.sequence) if (datetime.datetime.utcnow() - obj.timestamp).total_seconds() < 90: push_existing_users(existing, obj.sequence) logger.info("Done for now. Exiting.") break continue # Keep track of uid and their changes user_change_list = user_to_objects.get(obj.uid) or [] user_change_list.append((verb, obj)) user_to_objects[obj.uid] = user_change_list
def iterate_objects(q, lock): print "Objects starting" for (action, thing) in iter_osm_stream(state_dir='state'): if type(thing) == Finished: if stop.isSet(): q.put(thing) break else: thing = thing._replace(visible=False if action == 'delete' else True) q.put(thing) print "Objects finished"
kind_buffer = getattr(buffers, kind) getattr(csvs, kind).writerows(kind_buffer) setattr(buffers, kind, []) cut_new_file('changesets') cut_new_file('nodes') cut_new_file('ways') cut_new_file('relations') sys.stdout.write( '%8d changesets, %10d nodes, %10d ways, %10d relations' % (counter.changesets, counter.nodes, counter.ways, counter.relations)) for (verb, p) in iter_osm_stream( start_sqn=143, base_url='http://planet.openstreetmap.org/replication/day', parse_timestamps=False, state_dir='state'): if type(p) == pyosm.model.Node: buffers.nodes.append([ p.id, p.version, p.changeset, p.user, p.uid, False if verb == 'delete' else True, p.timestamp, ','.join([ '"%s"=>"%s"' % (re.escape(tag.key), re.escape(tag.value)) for tag in p.tags ]), '%0.7f, %0.7f' % (p.lon, p.lat) if p.lat else None ]) counter.nodes += 1 if counter.nodes % size_of_buffer == 0:
try: es.indices.delete_index('osm-archive') except: pass es.indices.create_index('osm-archive') es.indices.put_mapping('node', node_mapping, ['osm-archive']) es.indices.put_mapping('way', way_mapping, ['osm-archive']) es.indices.put_mapping('relation', relation_mapping, ['osm-archive']) n = 0 nodes = 0 ways = 0 relations = 0 sys.stdout.write('%10d nodes, %8d ways, %5d relations' % (nodes, ways, relations)) for (verb, p) in iter_osm_stream(): data = { 'id': p.id, 'version': p.version, 'changeset': p.changeset, 'timestamp': p.timestamp, 'user': p.user, 'uid': p.uid } data['tags'] = dict([(tag.key, tag.value) for tag in p.tags]) if type(p) == pyosm.model.Node: data['loc'] = {'lat': p.lat, 'lon': p.lon} es.index(data, 'osm-archive',
kind_csv.writerow(headers[kind]) def write_and_clear_buffer(kind): global csvs, buffers, counter kind_buffer = getattr(buffers, kind) getattr(csvs, kind).writerows(kind_buffer) setattr(buffers, kind, []) cut_new_file('changesets') cut_new_file('nodes') cut_new_file('ways') cut_new_file('relations') sys.stdout.write('%8d changesets, %10d nodes, %10d ways, %10d relations' % (counter.changesets, counter.nodes, counter.ways, counter.relations)) for (verb, p) in iter_osm_stream(start_sqn=143, base_url='http://planet.openstreetmap.org/replication/day', parse_timestamps=False, state_dir='state'): if type(p) == pyosm.model.Node: buffers.nodes.append([ p.id, p.version, p.changeset, p.user, p.uid, False if verb == 'delete' else True, p.timestamp, ','.join(['"%s"=>"%s"' % (re.escape(tag.key), re.escape(tag.value)) for tag in p.tags]), '%0.7f, %0.7f' % (p.lon, p.lat) if p.lat else None ]) counter.nodes += 1
try: es.indices.delete_index('osm-archive') except: pass es.indices.create_index('osm-archive') es.indices.put_mapping('node', node_mapping, ['osm-archive']) es.indices.put_mapping('way', way_mapping, ['osm-archive']) es.indices.put_mapping('relation', relation_mapping, ['osm-archive']) n = 0 nodes = 0 ways = 0 relations = 0 sys.stdout.write('%10d nodes, %8d ways, %5d relations' % (nodes, ways, relations)) for (verb, p) in iter_osm_stream(): data = { 'id': p.id, 'version': p.version, 'changeset': p.changeset, 'timestamp': p.timestamp, 'user': p.user, 'uid': p.uid } data['tags'] = dict([(tag.key, tag.value) for tag in p.tags]) if type(p) == pyosm.model.Node: data['loc'] = {'lat': p.lat, 'lon': p.lon} es.index(data, 'osm-archive', 'node', '%s.%s' % (p.id, p.version), bulk=True) nodes += 1