Example #1
0
def main():
    existing, start_sqn = load_existing_users()
    user_to_objects = {}

    logger.info("Starting at sequence %s", start_sqn)

    for verb, obj in iter_osm_stream(start_sqn=start_sqn):
        if isinstance(obj, pyosm.model.Finished):
            for uid, changes in user_to_objects.items():
                if uid not in existing:
                    existing.add(uid)
                    logger.info(
                        "New user %s found in changeset %s",
                        uid, changes[0][1].changeset
                    )

            # If we end up with users we haven't seen before,
            # add them to the feed of new users
            if user_to_objects:
                update_feeds(user_to_objects)
                push_existing_users(existing, obj.sequence)
                user_to_objects = {}

            logger.info("Finished processing sequence %s", obj.sequence)

            if (datetime.datetime.utcnow() - obj.timestamp).total_seconds() < 90:
                push_existing_users(existing, obj.sequence)
                logger.info("Done for now. Exiting.")
                break
            continue

        # Keep track of uid and their changes
        user_change_list = user_to_objects.get(obj.uid) or []
        user_change_list.append((verb, obj))
        user_to_objects[obj.uid] = user_change_list
Example #2
0
def iterate_objects(q, lock):
    print "Objects starting"
    for (action, thing) in iter_osm_stream(state_dir='state'):
        if type(thing) == Finished:
            if stop.isSet():
                q.put(thing)
                break
        else:
            thing = thing._replace(visible=False if action == 'delete' else True)
        q.put(thing)
    print "Objects finished"
Example #3
0
    kind_buffer = getattr(buffers, kind)
    getattr(csvs, kind).writerows(kind_buffer)
    setattr(buffers, kind, [])


cut_new_file('changesets')
cut_new_file('nodes')
cut_new_file('ways')
cut_new_file('relations')

sys.stdout.write(
    '%8d changesets, %10d nodes, %10d ways, %10d relations' %
    (counter.changesets, counter.nodes, counter.ways, counter.relations))
for (verb, p) in iter_osm_stream(
        start_sqn=143,
        base_url='http://planet.openstreetmap.org/replication/day',
        parse_timestamps=False,
        state_dir='state'):

    if type(p) == pyosm.model.Node:
        buffers.nodes.append([
            p.id, p.version, p.changeset, p.user, p.uid,
            False if verb == 'delete' else True, p.timestamp, ','.join([
                '"%s"=>"%s"' % (re.escape(tag.key), re.escape(tag.value))
                for tag in p.tags
            ]),
            '%0.7f, %0.7f' % (p.lon, p.lat) if p.lat else None
        ])
        counter.nodes += 1

        if counter.nodes % size_of_buffer == 0:
Example #4
0
try:
    es.indices.delete_index('osm-archive')
except:
    pass
es.indices.create_index('osm-archive')
es.indices.put_mapping('node', node_mapping, ['osm-archive'])
es.indices.put_mapping('way', way_mapping, ['osm-archive'])
es.indices.put_mapping('relation', relation_mapping, ['osm-archive'])

n = 0
nodes = 0
ways = 0
relations = 0
sys.stdout.write('%10d nodes, %8d ways, %5d relations' %
                 (nodes, ways, relations))
for (verb, p) in iter_osm_stream():
    data = {
        'id': p.id,
        'version': p.version,
        'changeset': p.changeset,
        'timestamp': p.timestamp,
        'user': p.user,
        'uid': p.uid
    }

    data['tags'] = dict([(tag.key, tag.value) for tag in p.tags])

    if type(p) == pyosm.model.Node:
        data['loc'] = {'lat': p.lat, 'lon': p.lon}
        es.index(data,
                 'osm-archive',
Example #5
0
    kind_csv.writerow(headers[kind])

def write_and_clear_buffer(kind):
    global csvs, buffers, counter

    kind_buffer = getattr(buffers, kind)
    getattr(csvs, kind).writerows(kind_buffer)
    setattr(buffers, kind, [])

cut_new_file('changesets')
cut_new_file('nodes')
cut_new_file('ways')
cut_new_file('relations')

sys.stdout.write('%8d changesets, %10d nodes, %10d ways, %10d relations' % (counter.changesets, counter.nodes, counter.ways, counter.relations))
for (verb, p) in iter_osm_stream(start_sqn=143, base_url='http://planet.openstreetmap.org/replication/day', parse_timestamps=False, state_dir='state'):

    if type(p) == pyosm.model.Node:
        buffers.nodes.append([
            p.id,
            p.version,
            p.changeset,
            p.user,
            p.uid,
            False if verb == 'delete' else True,
            p.timestamp,
            ','.join(['"%s"=>"%s"' % (re.escape(tag.key), re.escape(tag.value)) for tag in p.tags]),
            '%0.7f, %0.7f' % (p.lon, p.lat) if p.lat else None
        ])
        counter.nodes += 1
Example #6
0
try:
    es.indices.delete_index('osm-archive')
except:
    pass
es.indices.create_index('osm-archive')
es.indices.put_mapping('node', node_mapping, ['osm-archive'])
es.indices.put_mapping('way', way_mapping, ['osm-archive'])
es.indices.put_mapping('relation', relation_mapping, ['osm-archive'])

n = 0
nodes = 0
ways = 0
relations = 0
sys.stdout.write('%10d nodes, %8d ways, %5d relations' % (nodes, ways, relations))
for (verb, p) in iter_osm_stream():
    data = {
        'id': p.id,
        'version': p.version,
        'changeset': p.changeset,
        'timestamp': p.timestamp,
        'user': p.user,
        'uid': p.uid
    }

    data['tags'] = dict([(tag.key, tag.value) for tag in p.tags])

    if type(p) == pyosm.model.Node:
        data['loc'] = {'lat': p.lat, 'lon': p.lon}
        es.index(data, 'osm-archive', 'node', '%s.%s' % (p.id, p.version), bulk=True)
        nodes += 1