def audit(osmfile): ''' Performs the auditing operations on the given file. Returns a tuple (street_types, unnormalized_street_names), where street_types is a dictionary mapping unexpected street types to example street names with that type, and unnormalized_street_names is a set of street names that are not in normalized form. ''' street_types = defaultdict(set) unnormalized_street_names = set() for _, elem in logging_itr(ET.iterparse(osmfile)): if elem.tag == "node" or elem.tag == "way": street_name, street_type = get_street_name_and_type(elem) # Check for unexpected street types if street_type is not None and street_type not in expected: street_types[street_type].add(street_name) # Check for badly capitalized streets if street_name is not None and street_name != normalize_name(street_name): unnormalized_street_names.add(street_name) if elem.tag != 'tag': elem.clear() return street_types, unnormalized_street_names
def count_tags(filename): ''' Returns a dictionary mapping all tags present in the given file to the number of times they occur. ''' counts = defaultdict(int) for event, elem in logging_itr(ET.iterparse(filename)): counts[elem.tag] += 1 elem.clear() return dict(counts)
def process_map(filename): ''' Returns a set of all users who have contributed to the given file. ''' users = set() for _, element in logging_itr(ET.iterparse(filename)): user_id = get_user(element) if user_id is not None: users.add(user_id) element.clear() return users
def process_map(filename): ''' Counts the number of keys belonging to each type (as determined by the function key_type) in the given input file. ''' keys = Counter() for _, element in logging_itr(ET.iterparse(filename)): ktype = key_type(element) if ktype is not None: keys[ktype] += 1 return keys
def process_map(file_in): ''' Processes the given XML file as described in the file docstring. Writes output to a file with the same name, but a json extension. ''' file_out = "{0}.json".format(basename(file_in)) with codecs.open(file_out, "w") as fo: for _, element in logging_itr(ET.iterparse(file_in)): el = shape_element(element) if el: fo.write(json.dumps(el) + "\n") if element.tag not in ('tag', 'nd'): element.clear()