def process_map(file_in, validate): """Iteratively process each XML element and write to csv(s)""" with codecs.open(osmv.NODES_PATH, 'w') as nodes_file, \ codecs.open(osmv.NODE_TAGS_PATH, 'w') as nodes_tags_file, \ codecs.open(osmv.RELATIONS_PATH, 'w') as relations_file, \ codecs.open(osmv.RELATION_NODES_PATH, 'w') as relation_nodes_file, \ codecs.open(osmv.RELATION_RELATIONS_PATH, 'w') as relation_relations_file, \ codecs.open(osmv.RELATION_TAGS_PATH, 'w') as relation_tags_file, \ codecs.open(osmv.RELATION_WAYS_PATH, 'w') as relation_ways_file, \ codecs.open(osmv.WAYS_PATH, 'w') as ways_file, \ codecs.open(osmv.WAY_NODES_PATH, 'w') as way_nodes_file, \ codecs.open(osmv.WAY_TAGS_PATH, 'w') as way_tags_file: nodes_writer = UnicodeDictWriter(nodes_file, osmv.NODE_FIELDS) node_tags_writer = UnicodeDictWriter(nodes_tags_file, osmv.NODE_TAGS_FIELDS) relations_writer = UnicodeDictWriter(relations_file, osmv.RELATION_FIELDS) relation_nodes_writer = UnicodeDictWriter(relation_nodes_file, osmv.RELATION_NODES_FIELDS) relation_relations_writer = UnicodeDictWriter(relation_relations_file, osmv.RELATION_RELATIONS_FIELDS) relation_tags_writer = UnicodeDictWriter(relation_tags_file, osmv.RELATION_TAGS_FIELDS) relation_ways_writer = UnicodeDictWriter(relation_ways_file, osmv.RELATION_WAYS_FIELDS) ways_writer = UnicodeDictWriter(ways_file, osmv.WAY_FIELDS) way_nodes_writer = UnicodeDictWriter(way_nodes_file, osmv.WAY_NODES_FIELDS) way_tags_writer = UnicodeDictWriter(way_tags_file, osmv.WAY_TAGS_FIELDS) nodes_writer.writeheader() node_tags_writer.writeheader() relations_writer.writeheader() relation_nodes_writer.writeheader() relation_relations_writer.writeheader() relation_tags_writer.writeheader() relation_ways_writer.writeheader() ways_writer.writeheader() way_nodes_writer.writeheader() way_tags_writer.writeheader() validator = cerberus.Validator() for element in osmf.get_element(file_in, tags=('node', 'relation', 'way')): el = shape_element(element) if el: if validate is True: validate_element(el, validator) if element.tag == 'node': nodes_writer.writerow(el['node']) node_tags_writer.writerows(el['node_tags']) elif element.tag == 'relation': relations_writer.writerow(el['relation']) relation_nodes_writer.writerows(el['relation_nodes']) relation_relations_writer.writerows(el['relation_relations']) relation_tags_writer.writerows(el['relation_tags']) relation_ways_writer.writerows(el['relation_ways']) elif element.tag == 'way': ways_writer.writerow(el['way']) way_nodes_writer.writerows(el['way_nodes']) way_tags_writer.writerows(el['way_tags'])
def audit(): """ Audit zip codes in the osm file, display the result and the time it takes to audit the file """ print "Auditing zip codes in " + osmv.OSM_PATH start = time.time() for elem in osmf.get_element(osmv.OSM_PATH): for tag in elem.iter("tag"): if osmf.is_zipcode(tag): zipcode = tag.attrib['v'] audit_zipcode(zipcode) end = time.time() display_audit_zipcodes_result() print "Time elapsed: " + str(end - start) + " seconds"
def cleaning(): """ Clean street names in the osm file then audit cleaned street names, display the result and the time it takes to clean and to audit the file """ print "Cleaning and auditing street names in " + osmv.OSM_PATH start = time.time() for elem in osmf.get_element(osmv.OSM_PATH): for tag in elem.iter("tag"): if osmf.is_street_name(tag): name = tag.attrib['v'] name = clean_street_name(name) audit.audit_street_name(name) end = time.time() audit.display_audit_street_name_result() print "Time elapsed: " + str(end - start) + " seconds"
def clean(): """ Clean zip codes in the osm file then audit cleaned zip codes, display the result and the time it takes to clean and to audit the file """ print "Cleaning and auditing zip codes in " + osmv.OSM_PATH start = time.time() for elem in osmf.get_element(osmv.OSM_PATH): for tag in elem.iter("tag"): if osmf.is_zipcode(tag): zipcode = tag.attrib['v'] zipcode = clean_zipcode(zipcode) if zipcode: audit.audit_zipcode(zipcode) end = time.time() audit.display_audit_zipcodes_result() print "Time elapsed: " + str(end - start) + " seconds"
#!/usr/bin/env python # -*- coding: utf-8 -*- import xml.etree.ElementTree as ET # Use cElementTree or lxml if too slow import osm_functions as osmf import osm_variables as osmv SAMPLE_FILE = "dallas_sample.osm" k = 150 # Parameter: take every k-th top level element with open(SAMPLE_FILE, 'wb') as output: output.write('<?xml version="1.0" encoding="UTF-8"?>\n') output.write('<osm>\n ') # Write every kth top level element for i, element in enumerate(osmf.get_element(osmv.OSM_PATH)): if i % k == 0: output.write(ET.tostring(element, encoding='utf-8')) output.write('</osm>')