Example #1
0
def audit(osmfile):
    '''
    Performs the auditing operations on the given file. Returns a tuple
    (street_types, unnormalized_street_names), where street_types is a
    dictionary mapping unexpected street types to example street names with that
    type, and unnormalized_street_names is a set of street names that are not in
    normalized form.
    '''
    street_types = defaultdict(set)
    unnormalized_street_names = set()

    for _, elem in logging_itr(ET.iterparse(osmfile)):
        if elem.tag == "node" or elem.tag == "way":
            street_name, street_type = get_street_name_and_type(elem)

            # Check for unexpected street types
            if street_type is not None and street_type not in expected:
                street_types[street_type].add(street_name)

            # Check for badly capitalized streets
            if street_name is not None and street_name != normalize_name(street_name):
                unnormalized_street_names.add(street_name)

        if elem.tag != 'tag':
            elem.clear()

    return street_types, unnormalized_street_names
def audit(osmfile):
    '''
    Performs the auditing operations on the given file. Returns a tuple
    (street_types, unnormalized_street_names), where street_types is a
    dictionary mapping unexpected street types to example street names with that
    type, and unnormalized_street_names is a set of street names that are not in
    normalized form.
    '''
    street_types = defaultdict(set)
    unnormalized_street_names = set()

    for _, elem in logging_itr(ET.iterparse(osmfile)):
        if elem.tag == "node" or elem.tag == "way":
            street_name, street_type = get_street_name_and_type(elem)

            # Check for unexpected street types
            if street_type is not None and street_type not in expected:
                street_types[street_type].add(street_name)

            # Check for badly capitalized streets
            if street_name is not None and street_name != normalize_name(street_name):
                unnormalized_street_names.add(street_name)

        if elem.tag != 'tag':
            elem.clear()

    return street_types, unnormalized_street_names
def count_tags(filename):
    '''
    Returns a dictionary mapping all tags present in the given file to the
    number of times they occur.
    '''
    counts = defaultdict(int)
    for event, elem in logging_itr(ET.iterparse(filename)):
        counts[elem.tag] += 1
        elem.clear()
    return dict(counts)
Example #4
0
def count_tags(filename):
    '''
    Returns a dictionary mapping all tags present in the given file to the
    number of times they occur.
    '''
    counts = defaultdict(int)
    for event, elem in logging_itr(ET.iterparse(filename)):
        counts[elem.tag] += 1
        elem.clear()
    return dict(counts)
def process_map(filename):
    '''
    Returns a set of all users who have contributed to the given file.
    '''
    users = set()
    for _, element in logging_itr(ET.iterparse(filename)):
        user_id = get_user(element)
        if user_id is not None:
            users.add(user_id)
        element.clear()

    return users
Example #6
0
def process_map(filename):
    '''
    Counts the number of keys belonging to each type (as determined by the
    function key_type) in the given input file.
    '''
    keys = Counter()
    for _, element in logging_itr(ET.iterparse(filename)):
        ktype = key_type(element)
        if ktype is not None:
            keys[ktype] += 1

    return keys
Example #7
0
def process_map(filename):
    '''
    Returns a set of all users who have contributed to the given file.
    '''
    users = set()
    for _, element in logging_itr(ET.iterparse(filename)):
        user_id = get_user(element)
        if user_id is not None:
            users.add(user_id)
        element.clear()

    return users
def process_map(file_in):
    '''
    Processes the given XML file as described in the file docstring. Writes
    output to a file with the same name, but a json extension.
    '''
    file_out = "{0}.json".format(basename(file_in))
    with codecs.open(file_out, "w") as fo:
        for _, element in logging_itr(ET.iterparse(file_in)):
            el = shape_element(element)
            if el:
                fo.write(json.dumps(el) + "\n")
            if element.tag not in ('tag', 'nd'):
                element.clear()