Esempio n. 1
0
def load_new_tag(element, secondary, default_tag_type):

    # Creating a new tag dict to go along with the ways_tags and nodes_tags

    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    # Postal Code Updation
    elif secondary.attrib['k'][6:] == 'postcode':
        secondary.attrib['v'] = clean_postcode(secondary.attrib['v'])
        new['value'] = post_code

    else:
        new['value'] = secondary.attrib['v']

    return new
def tag_attributes(element, default_tag_type, child):
    tg = {}
    tg['id'] = element.attrib['id']
    if ':' not in child.attrib['k']:
        tg['key'] = child.attrib['k']
        tg['type'] = default_tag_type
    else:
        colpos = child.attrib['k'].index(':')
        pos_col = colpos + 1
        tg['key'] = child.attrib['k'][pos_col:]
        tg['type'] = child.attrib['k'][:colpos]
    if is_street_name(child):
        street_name = update_name(child.attrib['v'])
        tg['value'] = street_name

###====Code to remove invalid postal code====###

    elif tg['key']=='postcode':
        pin_code=child.attribute['v']
        m=POST_CODE.match(pin_code)
        if m is not None:
            if len(pin_code)==6:
                tg['value']=pin_code

            else:
                return None
####===============xxxxxxx====================####

    else:
        tg['value'] = child.attrib['v']
    return tg
Esempio n. 3
0
def add_tag(elem, elem_2, default_tag_type):
    new_tag = {}
    new_tag['id'] = elem.attrib['id']
    # check for ":"
    if elem_2.attrib['k'].find(":") >= 0:
        sec_idx = elem_2.attrib['k'].index(":")
        new_tag['key'] = elem_2.attrib['k'][sec_idx + 1:]
        new_tag['type'] = elem_2.attrib['k'][:sec_idx]
    else:
        new_tag['key'] = elem_2.attrib['k']
        new_tag['type'] = default_tag_type

    # Update street name
    if is_street_name(elem_2):
        street_name = update_name(elem_2.attrib['k'], mapping)
        new_tag['value'] = street_name

    # Update postal code
    elif is_postcode(elem_2):
        #print secondary.attrib['v']
        post_code = update_postcode(elem_2.attrib['v'])
        new_tag['value'] = post_code
    # Update phone
    elif is_phone(elem_2):
        #print secondary.attrib['v']
        phone_num = update_phone(elem_2.attrib['v'])
        new_tag['value'] = phone_num
    else:
        new_tag['value'] = elem_2.attrib['v']

    #print secondary.attrib['v']
    return new_tag
Esempio n. 4
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'], mapping)
        new['value'] = street_name

    elif new['key'] == 'province':
        # Change Texas to TX
        province = secondary.attrib['v']
        if province == 'Texas':
            province = 'TX'
        new['value'] = province

    else:
        new['value'] = secondary.attrib['v']

    return new
Esempio n. 5
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    elif new['key'] == 'phone':
        phone_num = phone_cleaned(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None

    else:
        new['value'] = secondary.attrib['v']

    return new
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way":

        # YOUR CODE HERE
        node['id'] = element.attrib['id']
        node['type'] = element.tag

        if 'visible' in element.attrib:
            node['visible'] = element.attrib['visible']

        node['created'] = {}
        for c in CREATED:
            node['created'][c] = element.attrib[c]

        if 'lat' in element.attrib:
            node['pos'] = [
                float(element.attrib['lat']),
                float(element.attrib['lon'])
            ]

        if element.find("tag") != None:
            for tag in element.iter("tag"):
                if lower_colon.match(tag.attrib['k']
                                     ) and tag.attrib['k'].startswith("addr:"):
                    if 'address' not in node:
                        node['address'] = {}

                    node['address'][tag.attrib['k'].split(":")
                                    [1]] = tag.attrib['v']
                    value = tag.attrib['v']

                    if audit.is_street_name(tag):
                        value, st_type = audit.update_name(
                            value, audit.mapping)

                        # Update the street name
                        node['address']['street'] = value

                        # Ignore if st_type is not expected
                        if st_type not in audit.expected:
                            continue

                        # Insert st_type to node for analysis
                        node['address']['st_type'] = st_type
                elif lower.match(tag.attrib['k']
                                 ) and not tag.attrib['k'].startswith("addr:"):
                    node[tag.attrib['k']] = tag.attrib['v']

        if element.find("nd") != None:
            node["node_refs"] = []
            for nd in element.iter("nd"):
                node["node_refs"].append(nd.attrib['ref'])

        return node
    else:
        return None
Esempio n. 7
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way":
        # Add position dictionary and created dictionary
        node["pos"] = ["Lat", "Lon"]
        node["created"] = {}
        node["node_refs"] = []
        node["address"] = {}

        node["osm_type"] = element.tag

        # Iterate through element attributes to check for validity
        for key,item in element.items():
            if not problemchars.match(item):
                if key in CREATED:
                    node["created"][key] = item

                elif key in ["lon", "lat"]:
                    node["pos"][key =="lon"] = float(item)

                else:
                    node[key] = item

        # Iterate through children
        for child in element:
            #Make assumption that only tags or nd exists
            if child.tag == "tag":
                key = child.get("k")
                item = child.get("v")
                if "addr:" in key:
                    if key.count(":") < 2:
                        sub_key = key.split(":")[1]
                        
                        #If street address change the suffix to la_mapping
                        if sub_key == "street":
                            item = update_name(item, la_mapping)
                        
                        try:
                            node["address"][sub_key] = item
                        except TypeError:
                            print("error")
                            pdb.set_trace
                            pass
                        
                        if sub_key = "postcode":
                            item = zip_update(item, node, zip_mapping)
                            pdb.set_trace():
                                
                    #If more than one colon in address
                    else:
                        break
                else:
                    node[key] = item

            elif child.tag == "nd":
                item = child.get("ref")
                node["node_refs"].append(item)
Esempio n. 8
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name
    
    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None
    
    elif new['key'] == 'province':
        # Change Ontario to ON
        province = secondary.attrib['v']
        if province == 'Ontario':
            province = 'ON'
        new['value'] = province

    elif new['key'] == 'postcode':
        post_code = secondary.attrib['v'].strip()
        m = POSTCODE.match(post_code)
        if m is not None:
            # Add space in middle if there is none
            if " " not in post_code:
                post_code = post_code[:3] + " " + post_code[3:]
            # Convert to upper case
            new['value'] = post_code.upper()
        else:
            # Keep zip code revealed in postal code audit for document deletion purposes
            if post_code[:5] == "14174":
                new['value'] = post_code
            # Ignore tag if improper postal code format
            else:
                return None

    else:
        new['value'] = secondary.attrib['v']
    
    return new
Esempio n. 9
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None

    elif new['key'] == 'province':
        # Change Ontario to ON
        province = secondary.attrib['v']
        if province == 'Ontario':
            province = 'ON'
        new['value'] = province

    elif new['key'] == 'postcode':
        post_code = secondary.attrib['v'].strip()
        m = POSTCODE.match(post_code)
        if m is not None:
            # Add space in middle if there is none
            if " " not in post_code:
                post_code = post_code[:3] + " " + post_code[3:]
            # Convert to upper case
            new['value'] = post_code.upper()
        else:
            # Keep zip code revealed in postal code audit for document deletion purposes
            if post_code[:5] == "14174":
                new['value'] = post_code
            # Ignore tag if improper postal code format
            else:
                return None

    else:
        new['value'] = secondary.attrib['v']

    return new
Esempio n. 10
0
def insert_address(node, address_name_tokens, tag):
    if "address" not in node:
        node["address"] = {}
    for token in address_name_tokens[1:]:
        if token == "street":
            node["address"][token] = audit.update_name(tag.attrib["v"], mapping)
        else:
            node["address"][token] = tag.attrib["v"]

    return node
Esempio n. 11
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        # YOUR CODE HERE
        lat_lon_array = [0,0]
        has_pos = False
        created_dict = {}
        node['type'] = element.tag
        for key,value in element.attrib.iteritems():
            if key in CREATED:
                created_dict[key] = value
            elif key in ['lat','lon']:
                has_pos = True
                if key == 'lat':
                    lat_lon_array[0] = float(value)
                else:
                    lat_lon_array[1] = float(value)
            else:
                node[key] = value
        address = {}
        has_address = False
        for tag in element.iter('tag'):
            if problemchars.search(tag.get('k')) is not None or len(tag.get('k').split(":"))>2:
                continue
            elif 'addr:' in tag.get('k'):
                has_address = True
                addr_list = tag.get('k').split(":")
                if addr_list[1] == 'street':
                    #if address is an "improper_address", then skip this tag.
                    if tag.get('v') not in improper_address:
                        address[addr_list[1]] = update_name(tag.get('v'))         
                    else:
                        print tag.get('v')
                        continue           
                else:
                    address[addr_list[1]] = tag.get('v')
                    
            else:
                node[tag.get('k')] = tag.get('v')
        
        node_refs = []
        has_node_refs = False
        for tag in element.iter('nd'):
            has_node_refs = True
            node_refs.append(tag.get('ref'))
        if has_node_refs:
            node['node_refs'] = node_refs
        node['created'] = created_dict
        if has_pos:
            node['pos'] = lat_lon_array
        if has_address:
            node['address'] = address
        return node
    else:
        return None
Esempio n. 12
0
def shape_element(element):
    if element.tag == "node" or element.tag == "way":
        node = {
            "id": element.attrib['id'],
            "type": element.tag,
            "visible": element.get("visible"),
            "created": {
                "version": element.get("version"),
                "changeset": element.get("changeset"),
                "timestamp": element.get("timestamp"),
                "user": element.get("user"),
                "uid": structure_single_user_id(element.get("uid"))
            }
        }

        if element.find("tag") is not None:
            for tag in element.iter("tag"):
                # Only add address node = {} when any of address elements exist.
                for elem_address in ADDRESS:
                    if tag.attrib['k'] == elem_address:
                        node["address"] = {}

                if tag.attrib['k'] == "addr:housenumber":
                    node["address"]["housenumber"] = tag.attrib['v']
                if tag.attrib['k'] == "addr:postcode":
                    node["address"]["postcode"] = tag.attrib['v']
                if tag.attrib['k'] == "addr:street":
                    node["address"]["street"] = update_name(
                        tag.attrib['v'], mapping)
                if tag.attrib['k'] == "amenity":
                    node["amenity"] = tag.attrib['v']
                if tag.attrib['k'] == "cuisine":
                    node["cuisine"] = tag.attrib['v']
                if tag.attrib['k'] == "name":
                    node["name"] = tag.attrib['v']
                if tag.attrib['k'] == "phone":
                    node["phone"] = tag.attrib['v']

        for nd_elem in element.iter("nd"):
            if 'node_refs' not in node:
                node['node_refs'] = []
            node['node_refs'].append(nd_elem.get("ref"))

        if element.get("lat") and element.get("lon"):
            node["pos"] = [
                float(element.get("lat")),
                float(element.get("lon"))
            ]

        return node
    else:
        return None
def shape_element(element):
    
    node = {}
    node["created"] = {}
    node["address"] = {}
    node["pos"] = []
    node_refs = []    
    
    if element.tag == "node" or element.tag == "way" :
        # YOUR CODE HERE
        if "id" in element.attrib:
            node["id"] = element.attrib["id"]
        
        node["type"] = element.tag     
        
        if "visible" in element.attrib:
            node["visible"] = element.attrib["visible"]
            
        for item in CREATED:
            if item in element.attrib:
                node["created"][item] = element.attrib[item]
        
        if "lat" in element.attrib:
            node["pos"].append(float(element.attrib["lat"]))
        if "lon" in element.attrib:
            node["pos"].append(float(element.attrib["lon"]))
            
        for tag in element.iter("tag"):
            if not(problemchars.search(tag.attrib['k'])):
                if tag.attrib['k'] == "addr:housenumber":
                    node["address"]["housenumber"] = tag.attrib['v']
                if tag.attrib['k'] == "addr:postcode":
                    node["address"]["postcode"] = audit.update_postcode(tag.attrib['v'])
                if tag.attrib['k'] == "addr:street":
                    node["address"]["street"] = audit.update_name(tag.attrib['v'])
                if tag.attrib['k'].find("addr")==-1:
                    node[tag.attrib['k']] = tag.attrib['v']
        
        if node["address"] == {}:
            node.pop("address", None)
        
        # Processing nd for a specific way
        if element.tag == "way":
            for nd in element.iter("nd"):
                node_refs.append(nd.attrib["ref"])
            if node_refs != []:
               node["node_refs"] = node_refs        
        
        return node
    else:
        return None
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way":

        # YOUR CODE HERE
        node['id'] = element.attrib['id']
        node['type'] = element.tag

        if 'visible' in element.attrib:
            node['visible'] = element.attrib['visible']

        node['created'] = {}
        for c in CREATED:
            node['created'][c] = element.attrib[c]

        if 'lat' in element.attrib:
            node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])]

        if element.find("tag") != None:
            for tag in element.iter("tag"):
                if lower_colon.match(tag.attrib['k']) and tag.attrib['k'].startswith("addr:"):
                    if 'address' not in node:
                        node['address'] = {}

                    node['address'][tag.attrib['k'].split(":")[1]] = tag.attrib['v']
                    value = tag.attrib['v']


                    if audit.is_street_name(tag):
                        value, st_type = audit.update_name(value, audit.mapping)

                        # Update the street name
                        node['address']['street'] = value

                        # Ignore if st_type is not expected
                        if st_type not in audit.expected:
                            continue

                        # Insert st_type to node for analysis
                        node['address']['st_type'] = st_type
                elif lower.match(tag.attrib['k']) and not tag.attrib['k'].startswith("addr:"):
                    node[tag.attrib['k']] = tag.attrib['v']

        if element.find("nd") != None:
            node["node_refs"] = []
            for nd in element.iter("nd"):
                node["node_refs"].append(nd.attrib['ref'])

        return node
    else:
        return None
Esempio n. 15
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name
    
    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None
    elif new['key'] == 'city':
        city = update_city(secondary.attrib['v'])
        new['value'] = city
#    elif new['key'] == 'housenumber':
#        housenumber = update_housenumber(secondary.attrib['v'])
#        new['value'] = housenumber
#    elif new['key'] == 'province':
#        # Change Ontario to ON
#        province = secondary.attrib['v']
#        if province == 'Ontario':
#            province = 'ON'
#        new['value'] = province
#
    elif new['key'] == 'postcode':
        post_code = update_postcode(secondary.attrib['v'])        
        new['value'] = post_code
        

    else:
        new['value'] = secondary.attrib['v']
    
    return new
Esempio n. 16
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        # YOUR CODE HERE
        node["id"] = element.get("id")
        node["type"] = element.tag

        if element.get('visible') is not None: node['visible'] = element.get('visible')

        if element.get("lat") and element.get("lon"):
           node["pos"] = [float(element.get("lat")), float(element.get("lon"))]

        node["created"] = { "version": element.get("version"), "changeset": element.get("changeset"), "timestamp": element.get("timestamp"), "user": element.get("user"), "uid": element.get("uid") }





        descendants = list(element.iter())

        nd_array = []
        address = {}
        for nd in element.findall('nd'):
            nd_array.append(nd.get("ref"))
            node["node_refs"] = nd_array

        for t in element.findall('tag'):
            k = t.get("k")
            if problemchars.search(k):
               continue
            else:
               if is_address.search(k) and not second_colon.search(k):
                    if is_street.search(k):
                        address.update({"street": audit.update_name(t.get("v"))})
                    else:
                        prop = k.split(":")[-1]
                        address.update({prop: t.get("v")})
               else:
                   node[k] = t.get("v")


        if bool(address): node["address"] = address
        address = {}
        nd_array = []
        print node
        return node
    else:
        return None
Esempio n. 17
0
def update_dict(elem):
    # updates secondary tags dictionary for a given element if a zip code or street name needs to be changed
    update_type = elem.attrib['k']
    if update_type == 'addr:postcode':
        new = update_zip(elem.attrib['v'])
    elif update_type == 'addr:street':
        new = update_name(elem.attrib['v'], mapping)
    else:
        return False

    # if the dictionary does not need to be updated, output False
    if new != elem.attrib['v']:
        # print '{} --> {}'.format(elem.attrib['v'], new)
        return new
    else:
        return False
Esempio n. 18
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        node['id'] = element.attrib['id']

        node['type'] = element.tag
        if 'visible' in element.attrib:
            node['visible'] = str(element.attrib["visible"]).lower()
        created = {}
        for subfield in CREATED:
            if subfield in element.attrib and element.attrib[subfield] is not None:
                created[subfield] = element.attrib[subfield]

        if 'lat' in element.attrib and 'lon' in element.attrib:

            pos = [float(element.attrib['lat']),float(element.attrib['lon'])]
            node['pos'] = pos
        node['created'] = created
        addr = {}
        for tag in element.iter("tag"):
            if tag.attrib['k'].find("addr:") >=0:
                k_ = tag.attrib['k'].replace('addr:','')
                if k_.find(":") <0:
                    # Fix unexpected street name
                    if k_ == "street":
                        addr[k_] = audit.update_name(tag.attrib['v'], audit.mapping)
                    elif k_ == "city":
                        addr[k_] = audit.update_name_city(tag.attrib['v'], audit.mapping_city)
                    else:
                        addr[k_] = tag.attrib['v']

            else:
                node[tag.attrib['k']]  = tag.attrib['v']
        node_ref = []

        if addr !={}:
            node['address'] = addr
        for nd in element.iter("nd"):
            node_ref.append(nd.attrib['ref'])
        if node_ref!= []:
            node['node_refs'] = node_ref
        return node
    else:
        return None
def process_children(node, element):
    """ Helper function that builts the JSON inside the node object parameter. Data about
        the address field is retrieve from the element parameter
    """
    # address dictionary to be inserted
    address = {} 

    node_refs = []
    # iterate through every child of passed element
    for child in element:
    	# if the child is a 'tag' tag
        if child.tag == 'tag':
        	# iterate through all child.attributes
            for attrib in child.attrib:
            	# if attribute is in ADDRESS list, add key and value to the address
                if child.attrib[attrib] in ADDRESS:
                	# first update street name for consistency
                	if is_street_name(child) and is_street_name in mapping:
                		address[child.attrib['k'][5:]] = update_name(child.attrib['v'], mapping)
                    	else:
                    		address[child.attrib['k'][5:]] = child.attrib['v']
                # else, just add the value of the attribute to the node
                elif not child.attrib['k'].startswith(ADDRESS_PREFIX):
                	key = child.attrib['k']
                	if '.' in key:
        				key = remove_dots(key)

                	node[key] = child.attrib['v']

        if child.tag == 'nd':
        	for attrib in child.attrib:
                    node_refs.append(child.attrib[attrib])
        # insert node refs when applicable 
        if node_refs != []:
        	node['node_refs'] = node_refs
        # insert address when applicable
        if address != {}:
            node['address'] = address
Esempio n. 20
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []
    #if child.attrib["k"] == 'addr:street':
    #  node_tag["value"] = update_name(child.attrib["v"], mapping)
    if element.tag == 'node':
        for attrib in element.attrib:
            if attrib in NODE_FIELDS:
                node_attribs[attrib] = element.attrib[attrib]

        for child in element:
            node_tag = {}
            if LOWER_COLON.match(child.attrib['k']):
                node_tag['type'] = child.attrib['k'].split(':', 1)[0]
                node_tag['key'] = child.attrib['k'].split(':', 1)[1]
                node_tag['id'] = element.attrib['id']
                if child.attrib["k"] == 'addr:street':
                    node_tag["value"] = update_name(child.attrib["v"], mapping)
                else:
                    node_tag['value'] = child.attrib['v']
                tags.append(node_tag)
            elif PROBLEMCHARS.match(child.attrib['k']):
                continue
            else:
                node_tag['type'] = 'regular'
                node_tag['key'] = child.attrib['k']
                node_tag['id'] = element.attrib['id']
                node_tag['value'] = child.attrib['v']
                tags.append(node_tag)

        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':
        for attrib in element.attrib:
            if attrib in WAY_FIELDS:
                way_attribs[attrib] = element.attrib[attrib]

        position = 0
        for child in element:
            way_tag = {}
            way_node = {}

            if child.tag == 'tag':
                if LOWER_COLON.match(child.attrib['k']):
                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    tags.append(way_tag)
                elif PROBLEMCHARS.match(child.attrib['k']):
                    continue
                else:
                    way_tag['type'] = 'regular'
                    way_tag['key'] = child.attrib['k']
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    tags.append(way_tag)

            elif child.tag == 'nd':
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = child.attrib['ref']
                way_node['position'] = position
                position += 1
                way_nodes.append(way_node)

        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 21
0
def colon_clean(key, val, node):
    """Performs the shaping and cleaning for keys with a ':'
    in their names.  Handles a variety of cases by ensuring protected
    keys are not overwritten or reformatted improperly.  In general, 
    creates nested dictionaries with the first part preceding the colon
    as the top level key, and string following the colon will be the nested dictionary
    key values."""
    if len(key) > 5 and key[:5] == 'name:':
        # Create array of alternate names (or names in different languages)
        key = key[5:]
        if 'other_names' not in node.keys():
            node['other_names'] = {key:val}
        else:
            node['other_names'].update( {key:val} )
    elif len(key) > 5 and key[:5] == 'addr:':
        # For all address values, capitalize only the first character of each word
        val = normalize_capitalization(val)
        key = key[5:]
        if lower_colon.search(key) is None:
            if key == "city":
                fixed_city = update_city(val, city_mapping)
                if fixed_city != val: 
                    print val, "=>", fixed_city
                    val = fixed_city
            #Defaults to having 'San Francisco' as city name
            elif "address" not in node.keys():
                node['address'] = {'city':default_city}
            elif "city" not in node['address'].keys():
                node['address'].update({'city':default_city})
            if key == "street":
                fixed_name = update_name(val, mapping)
                if fixed_name != val: 
                    print val, "=>", fixed_name
                    val = fixed_name
            if 'address' not in node.keys():
                node['address'] = {key:val}
            else:
                node['address'].update({key:val})
        # Ignore keys with more than 1 colon (and starting with "addr:")
    elif len(key) > 6 and key[:6] == 'tiger:':
        key = key[6:]
        # Skips 'Tiger:MTFCC' keys
        if key == 'mtfcc':
            return node
        if 'tiger' not in node.keys():
            node['tiger'] = {key:val}
        else:
            node['tiger'].update({key:val})
    elif len(key) > 5 and key[:5] == 'gnis:':
        key = key[5:]
        if 'gnis' not in node.keys():
            node['gnis'] = {key:val}
        else:
            node['gnis'].update({key:val})
    elif skip_colon.search(key) is not None:
        # These keys have garbage values, don't store them
        # Examples of keys that are skipped:
        #  'redwood_city_ca:addr_id', 'rwc_ca:buildingid', 'paloalto_ca:id'
        #  'gosm:sig:8CBDE645', 'massgis:cat'
        print 'Skip colon match: %s=%s'%(key,val)
        return node
    else:
        dict_key,nested_key = key.split(":",1) # Only create dict with first part of key
        if nested_key in RESERVED_KEYS:
            # Key 'note:address' contains a street address, save it
            if dict_key == 'note' and nested_key == 'address':
                val = update_name(val, mapping)
                if 'address' not in node.keys():
                    node['address'] = {'street':val}
                elif 'street' not in node['address'].keys():
                    node['address'].update({'street':val})
                else:
                    node['address'].update({'street_address':val})
            # Skipping 'source:name'
            return node
        #if debug: print "Other (%s) key: %s, val: %s"%(key,nested_key,val)
        if dict_key not in node.keys():
            node[dict_key] = {nested_key:val}
        else:
            if isinstance(node[dict_key],dict):
                node[dict_key].update({nested_key:val})
            else:
                # Convert to dict (use orig outer dict_key with '_key'
                # appended as the nested key for the original value
                node[dict_key] = {dict_key+'_key':node[dict_key], nested_key:val}
    return node
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements
    position = 0

    # YOUR CODE HERE

    if element.tag == 'node':

        for attr in element.attrib:
            if attr in NODE_FIELDS:
                node_attribs[attr] = element.attrib[attr]

        for child in element:
            node_tag = {}

            if LOWER_COLON.match(child.attrib['k']):
                node_tag['type'] = child.attrib['k'].split(':', 1)[0]
                node_tag['key'] = child.attrib['k'].split(':', 1)[1]
                node_tag['id'] = element.attrib['id']
                node_tag['value'] = child.attrib['v']
                tags.append(node_tag)

            elif PROBLEMCHARS.match(child.attrib['k']):
                continue
            else:
                node_tag['type'] = 'regular'
                node_tag['key'] = child.attrib['k']
                node_tag['id'] = element.attrib['id']
                node_tag['value'] = child.attrib['v']
                tags.append(node_tag)

        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':
        for attr in element.attrib:
            if attr in WAY_FIELDS:
                way_attribs[attr] = element.attrib[attr]

        for child in element:
            way_tag = {}
            way_node = {}

            if child.tag == 'tag':
                if LOWER_COLON.match(child.attrib['k']
                                     ) and child.attrib['k'][:5] == "addr:":
                    if child.attrib['k'][5:] == "street":
                        way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                        way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                        way_tag['id'] = element.attrib['id']
                        way_tag['value'] = update_name(child.attrib['v'],
                                                       mapping)
                        tags.append(way_tag)

                if child.attrib['k'][5:] == "postcode":
                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = update_postal_code(
                        child.attrib['v'], zip_mapping)
                    tags.append(way_tag)

                elif LOWER_COLON.match(child.attrib['k']):
                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    tags.append(way_tag)

                elif PROBLEMCHARS.match(child.attrib['k']):
                    continue

                else:
                    way_tag['type'] = 'regular'
                    way_tag['key'] = child.attrib['k']
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    tags.append(way_tag)

            elif child.tag == 'nd':
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = child.attrib['ref']
                way_node['position'] = position
                position += 1
                way_nodes.append(way_node)

        #print tags
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 23
0
File: data.py Progetto: nic2/nano
def shape_element(element):
    node = {}
    created = {}
    address = {}
    lat = 0.0
    lon = 0.0
    if element.tag == "node" or element.tag == "way":
        node['type'] = element.tag
        for key in element.attrib.keys():
            if key in CREATED:
                created[key] = element.attrib[key]
            elif key == 'lat':
                lat = float(element.attrib['lat'])
            elif key == 'lon':
                lon = float(element.attrib['lon'])
            else:
                node[key] = element.attrib[key]
            
        node['created'] = created
        node['pos'] = [lat, lon]

        # Filter out any nodes or ways situated in Poland or with a postcode that does not belong to Berlin
        # or its near surroundings
        for kv in element.findall('tag'):
            if kv.attrib['k'] == 'addr:country' and kv.attrib['v'] == 'PL':
                return None
            try:
                if kv.attrib['k'] == 'addr:postcode' and int(kv.attrib['v']) not in range(10115, 15000):
                    return None
            except ValueError:
                return None

        for kv in element.findall('tag'):
            if re.search(problemchars, kv.attrib['k']):
                continue
            if 'addr:street:' in kv.attrib['k']:
                continue
            if re.search(lower_colon, kv.attrib['k']):
                if 'addr:' in kv.attrib['k']:
                    key = re.sub(r'addr:', '', kv.attrib['k'])
                    if kv.attrib['k'] == 'addr:housenumber':
                        kv.attrib['v'] = audit.update_housenumber(kv.attrib['v'])
                    if kv.attrib['k'] == 'addr:street':
                        kv.attrib['v'] = audit.update_name(kv.attrib['v'])
                    if kv.attrib['v']:
                        address[key] = kv.attrib['v']
                    else:
                        continue
                else:
                    node[kv.attrib['k']] = kv.attrib['v']
            else:
                 #
                 if kv.attrib['k'] == 'phone':
                    kv.attrib['v'] = audit.update_phonenumber(kv.attrib['v'])
                 if kv.attrib['v']:
                    node[kv.attrib['k']] = kv.attrib['v']
                 else:
                     continue
        if address:
            node['address'] = address
        refs = []
        for nd in element.findall('nd'):
            refs.append(nd.attrib['ref'])
        if refs:
            node['node_refs'] = refs
        #print node
        return node
    else:
        return None
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []

    if element.tag == 'node':
        node_atts = element.attrib
        node_children = element.getchildren()
        for field in node_attr_fields:
            if field not in node_atts and field == 'user':
                node_attribs[field] = 'NO_USER'
            elif field not in node_atts and field == 'uid':
                node_attribs[field] = 0
            else:
                node_attribs[field] = node_atts[field]

            if len(node_children) == 0:
                tags = []
            else:
                for child in node_children:
                    child_dict = {}
                    child_atts = child.attrib
                    for field in NODE_TAGS_FIELDS:
                        if field == 'id':
                            child_dict[field] = node_atts[field]
                        elif field == 'key':
                            if ':' in child_atts['k']:
                                child_dict['type'], child_dict[field] = \
                                    child_atts['k'].split(':', 1)
                            else:
                                child_dict[field] = child_atts['k']
                                child_dict['type'] = 'regular'
                        elif field == 'value':
                            if audit.is_street_name(child):
                                child_dict[field] = audit.update_name(
                                    child_atts['v'], audit.mapping)
                            elif audit.is_up_for_fixing(child):
                                child_dict[field] = audit.fix(child_atts['v'])
                            else:
                                child_dict[field] = child_atts['v']
                    tags.append(child_dict)
        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':
        way_atts = element.attrib
        way_children = element.getchildren()
        for field in way_attr_fields:
            way_attribs[field] = way_atts[field]
            if len(way_children) == 0:
                tags = []
                way_nodes = []
            else:
                counter = 0
                for child in way_children:
                    if child.tag == 'tag':
                        child_dict = {}
                        child_atts = child.attrib
                        for field in WAY_TAGS_FIELDS:
                            if field == 'id':
                                child_dict[field] = way_atts[field]
                            elif field == 'key':
                                if ':' in child_atts['k']:
                                    child_dict['type'], child_dict[field] = \
                                        child_atts['k'].split(':', 1)
                                else:
                                    child_dict[field] = child_atts['k']
                                    child_dict['type'] = 'regular'
                            elif field == 'value':
                                if audit.is_street_name(child):
                                    child_dict[field] = audit.update_name(
                                        child_atts['v'], audit.mapping)
                                elif audit.is_up_for_fixing(child):
                                    child_dict[field] = audit.fix(
                                        child_atts['v'])
                                else:
                                    child_dict[field] = child_atts['v']
                        tags.append(child_dict)
                    elif child.tag == 'nd':
                        child_dict = {}
                        child_atts = child.attrib
                        for field in WAY_NODES_FIELDS:
                            if field == 'id':
                                child_dict[field] = way_atts[field]
                            elif field == 'node_id':
                                child_dict[field] = child_atts['ref']
                            else:
                                child_dict[field] = counter
                                counter += 1
                        way_nodes.append(child_dict)
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 25
0
def shape_element(element):
    node = {}
    
    if element.tag == "node" or element.tag == "way" :

        # created and position elements: 
        node['type'] = element.tag
        
        for attribute in element.attrib:


            #created dictionary
            if attribute in CREATED: 
                if 'created' not in node:
                    node['created'] = {}
                #timestamp is not json serializeable    
                if attribute == "timestamp":
                    node['created'][attribute] = str(element.attrib[attribute])

                else: 
                    node['created'][attribute] = element.attrib[attribute]

            #position list
            elif attribute in COORDINATES:
                if 'pos' not in node:
                    node['pos'] = [None,None]
                    
                if attribute == 'lat':
                    node['pos'][0] = float(element.attrib[attribute])
                if attribute == 'lon': 
                    node['pos'][1] = float(element.attrib[attribute])
            else:
                node[attribute] = element.attrib[attribute]
     
        #iterate over child-tags:   
        for tag in element.iter("tag"):
            if not problemchars.search(tag.attrib['k']):

                # Tags with single colon and beginning with addr
                if lower_colon.search(tag.attrib['k']) and tag.attrib['k'].find('addr') == 0:
                    if 'address' not in node:
                        node['address'] = {}

                    small_attribute = tag.attrib['k'].split(':', 1)

                    if audit.is_street_name(tag):
                        better_name = audit.update_name(tag.attrib['v'], audit.mapping_road)
                        better_name_direction = audit.update_direction(better_name, audit.mapping_directions)
                        node['address'][small_attribute[1]] = better_name_direction

                    else:    
                        node['address'][small_attribute[1]] = tag.attrib['v']

                # All other tags that don't begin with "addr"
                elif not tag.attrib['k'].find('addr') == 0:
                    if tag.attrib['k'] not in node:
                        node[tag.attrib['k']] = tag.attrib['v']
                else:
                    node["tag:" + tag.attrib['k']] = tag.attrib['v']
            
                    
       # change node_refs in way elements
        for nd in element.iter("nd"):
            if 'node_refs' not in node:
                node['node_refs'] = []
            node['node_refs'].append(nd.attrib['ref'])      
        return node
        
    else:
        return None
def process_key_and_value(key, value):
    if key == 'addr':
        key = 'address'
    elif key == 'street':
        value = street_name_auditor.update_name(value, sreet_name_mapping)
    return key, value
Esempio n. 27
0
def shape_element(element):
    expected = [
        "Calle", "CALLE", u"Barrio", u"Centro", "Calleja", "Centro Comercial",
        "Avenida", "Plaza", "Camino", "Estacion", "Parking", "Campus",
        "Carretera", "Glorieta", "Paseo", "Rotonda", "Juan", "Gran", "Dante",
        "Maria", "Pasaje", u'Le\xf3n', u'Comisar\xeda', "Edificio", "Vivero",
        "CARRETERA", "Centro", "Lope", u'pol\xedgono', u'Pol\xedgono',
        "Bajada", "Subida", "Grupo", "Rampa", "Barrio", "AREA", "La", "Acceso",
        "POLIGONO", "Mercado", "Cuesta", u"Urbanizaci\xf3n", "Ernest", "Pol",
        "Puerto", "Jardines", "San", u"Autov\xeda", u"V\xeda",
        "MercaSantander", u"Traves\xeda", u"ISLA", u"Playa", "N-611", "BARRIO",
        "Las"
    ]

    # UPDATE THIS VARIABLE
    mapping = {
        "C/": "Calle",
        "Barrio": "Barrio",
        "Calle": "Calle",
        "Calles": "Calle",
        "Avenidad": "Avenida",
        "Avda.": "Avenida",
        u"Calla": "Calle",
        "name=Avenida": "Avenida",
        "name=Calle": "Calle",
        "AREA,": "Area",
        "Bajade": "Bajada",
        "Ramapa": "Rampa"
    }

    node = {}
    # you should process only 2 types of top level tags: "node" and "way"
    if element.tag == "node" or element.tag == "way":
        for key in element.attrib.keys():
            val = element.attrib[key]
            node["type"] = element.tag
            if key in CREATED:
                if not "created" in node.keys():
                    node["created"] = {}
                node["created"][key] = val
            elif key == "lat" or key == "lon":
                if not "pos" in node.keys():
                    node["pos"] = [0.0, 0.0]
                old_pos = node["pos"]
                if key == "lat":
                    new_pos = [float(val), old_pos[1]]
                else:
                    new_pos = [old_pos[0], float(val)]
                node["pos"] = new_pos
            else:
                node[key] = val
            for tag in element.iter("tag"):
                tag_key = tag.attrib['k']
                tag_val = tag.attrib['v']
                if problemchars.match(tag_key):
                    continue
                elif tag_key.startswith("addr:"):
                    if not "address" in node.keys():
                        node["address"] = {}
                    addr_key = tag.attrib['k'][len("addr:"):]
                    if lower_colon.match(addr_key):
                        continue
                    else:
                        if tag_val.split(' ')[0] in expected:
                            node["address"][addr_key] = tag_val
                        elif tag_key.endswith("street"):
                            node["address"][addr_key] = update_name(
                                tag_val, mapping)
                        elif tag_key.endswith("postcode"):
                            node["address"][addr_key] = update_postcode(
                                tag_val)
                        elif tag_key.endswith("housenumber"):
                            node["address"][addr_key] = update_housenumber(
                                tag_val)
                        else:
                            node["address"][addr_key] = tag_val

                elif lower_colon.match(tag_key):
                    node[tag_key] = tag_val
                else:
                    node[tag_key] = tag_val
        for tag in element.iter("nd"):
            if not "node_refs" in node.keys():
                node["node_refs"] = []
            node_refs = node["node_refs"]
            node_refs.append(tag.attrib["ref"])
            node["node_refs"] = node_refs

        return node
    else:
        return None
Esempio n. 28
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        # YOUR CODE HERE
        created = {}
        addr = {}
        pos = []
        node['type'] = element.tag
        for keys in element.attrib:
            if keys == 'timestamp':
                timestamp = element.attrib[keys]
                year = timestamp[:4]
                month = timestamp[5:7]
                created['year'] = year
                created['month'] = month
            
            
            if keys in CREATED:
                created[keys] = element.attrib[keys]
            elif keys == 'lat':
                pos.append(float(element.attrib[keys]))
                pos.append(float(element.attrib['lon']))            
                
            elif keys == 'lon':
                continue
            else:
                node[keys] = element.attrib[keys]
    
        node['created'] = created
        node['pos'] = pos
        
    
        nodes = []
        for tags in element:
            #print tags.attrib
            #print tags.tag
            
            
            attrib = tags.attrib
            if tags.tag == 'nd':
                nodes.append(attrib['ref'])
            elif tags.tag == 'tag':
                #if problematic then ignore 
                key = attrib['k']
                if key.startswith('addr:'):
                    if len( key.split(':')) == 2 :
                        if(key == 'addr:street') :
                            #print attrib['v'],"==>", audit.update_name(attrib['v'], audit.mapping)
                            addr[key.split(':')[1]] = audit.update_name(attrib['v'], audit.mapping)
                        if((key == 'addr:postcode') or (key =='addr:zipcode') ) :
                            addr[key.split(':')[1]] = audit.update_zipcode(attrib['v'])
                elif problemchars.search(attrib['v']):
                    #print ">>>",attrib['v']
                    continue                
                else :
                    
                        node[attrib['k']] = attrib['v']
    
        if element.tag == 'way' and len(nodes) > 0:
            node['node_refs'] = nodes
        if len(addr) > 0:
            node['address'] = addr
            
        
        
        #print node
                
      
        
#        for tags in iter(element):
#            #check if value is problematic
#            tag =  tags.attrib
#            if problemchars.search(tag['v']):
#                continue
#            
#            #check if the value should go in created 
#            if tag['k'] in CREATED:
#                created['k']  = tag['v']
#                continue;
#            if tag['k'].startswith('addr:'):
#                addr[tag['k']] = tag['v']
#                continue
#            
#            node[tag['k']] = tag['v']
#            
#        node['address'] = addr
#        node['created'] = created
#        print node
        element.clear()
        return node
        
    else:
        return None
Esempio n. 29
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    # YOUR CODE HERE
    if element.tag == 'node':
        #node_attribs=element.attrib
        for i, e in enumerate(node_attr_fields):
            node_attribs[e] = element.attrib[node_attr_fields[i]]

        for elem in element.iter('tag'):
            node_tag = {}
            node_tag['id'] = node_attribs['id']
            k = elem.get('k')
            v = elem.get('v')
            node_tag['value'] = v
            #print k,v
            if problem_chars.search(k):
                return None
            elif ':' in k:
                if k == 'addr:street':
                    if v != None:
                        update_name(v, mapping)
                    else:
                        continue
                elif k == 'addr:postcode':
                    if len(v) != 6:
                        continue
                node_tag['type'], node_tag['key'] = k.split(':', 1)
            else:
                node_tag['key'] = k
                node_tag['type'] = default_tag_type
            tags.append(node_tag)
            #print elem.attrib

        #print {'node': node_attribs, 'node_tags': tags}
        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        for i, e in enumerate(way_attr_fields):
            way_attribs[e] = element.attrib[way_attr_fields[i]]

        for i, elem in enumerate(element.iter('nd')):
            way_node = {}
            way_node['id'] = way_attribs['id']
            way_node['node_id'] = elem.get('ref')
            way_node['position'] = i
            way_nodes.append(way_node)
        for elem in element.iter('tag'):
            way_tag = {}
            way_tag['id'] = way_attribs['id']
            k = elem.get('k')
            v = elem.get('v')
            way_tag['value'] = v
            #print k,v
            if problem_chars.search(k):
                return None
            elif ":" in k:
                if k == 'addr:street':
                    update_name(v, mapping)

                elif k == 'addr:postcode' and len(v) != 6:

                    continue
                way_tag['type'], way_tag['key'] = k.split(':', 1)

            else:
                way_tag['key'] = k
                way_tag['type'] = default_tag_type
            tags.append(way_tag)
            #print elem.attrib

        #print {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 30
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    # YOUR CODE HERE
    if element.tag == 'node':
        for attrib in element.attrib:
            if attrib in NODE_FIELDS:
                node_attribs[attrib] = element.attrib[attrib]
        for child in element:
            node_tag = {}
            if PROBLEMCHARS.match(child.attrib["k"]):
                continue
            elif LOWER_COLON.match(
                    child.attrib["k"]
            ):  # code for 'id', 'key' and 'type' keys ...
                node_tag['type'] = child.attrib['k'].split(':', 1)[0]
                node_tag['key'] = child.attrib['k'].split(':', 1)[1]
                node_tag['id'] = element.attrib['id']
                node_tag['value'] = child.attrib['v']
                if child.attrib["k"] == 'addr:street':
                    node_tag["value"] = update_name(child.attrib["v"])
                elif child.attrib["k"] == 'addr:state':
                    # check to see if the cleaning function returns a value:
                    if updat_state_name(child.attrib["v"], stat_mapping):
                        # if it does, add it to the dictionary
                        node_tag["value"] = updat_state_name(
                            child.attrib["v"], stat_mapping)
                    else:
                        # if it doesn't, the attribute is uncleanable so move to the next element
                        continue
                else:  #===>> is this else is  right ?? YES!
                    # if the two conditions above don't hold, you want
                    # the value to be the original 'v' attribute from the '.osm' file:
                    node_tag["value"] = child.attrib["v"]
                # ===> unindent this statement
                tags.append(node_tag)
            else:
                node_tag['type'] = 'regular'
                node_tag['key'] = child.attrib['k']
                node_tag['id'] = element.attrib['id']
                node_tag['value'] = child.attrib['v']
                tags.append(node_tag)

        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        for attrib in element.attrib:
            if attrib in WAY_FIELDS:
                way_attribs[attrib] = element.attrib[attrib]

        position = 0
        for child in element:
            way_tag = {}
            way_node = {}
            if child.tag == 'tag':
                if PROBLEMCHARS.match(child.attrib["k"]):
                    continue
                elif LOWER_COLON.match(
                        child.attrib["k"]
                ):  # code for 'id', 'key' and 'type' keys ...
                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    if child.attrib["k"] == 'addr:street':
                        way_tag["value"] = update_name(child.attrib["v"])
                    elif child.attrib["k"] == 'addr:state':
                        # check to see if the cleaning function returns a value:
                        if updat_state_name(child.attrib["v"], stat_mapping):
                            # if it does, add it to the dictionary
                            way_tag["value"] = updat_state_name(
                                child.attrib["v"], stat_mapping)
                        else:
                            # if it doesn't, the attribute is uncleanable so move to the next element
                            continue
                    else:
                        way_tag["value"] = child.attrib["v"]
                    tags.append(way_tag)
                else:
                    way_tag['type'] = 'regular'
                    way_tag['key'] = child.attrib['k']
                    way_tag['id'] = element.attrib['id']
                    way_tag['value'] = child.attrib['v']
                    tags.append(way_tag)
            elif child.tag == 'nd':
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = child.attrib['ref']
                way_node['position'] = position
                position += 1
                way_nodes.append(way_node)

        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 31
0
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements


    # NODE element
    if element.tag == 'node':
      for attribute in element.attrib:
        if attribute in NODE_FIELDS:
          node_attribs[attribute] = element.attrib[attribute]

      for tag in element :
        node_tags = {}
        if LOWER_COLON.match(tag.attrib['k']) :
          node_tags['id'] = element.attrib['id']
          node_tags['key'] = tag.attrib['k'].split(':',1)[1]

          if tag.attrib['k'] == "addr:street":
            node_tags['value'] = audit.update_name(tag.attrib['v'], audit.mapping)
          elif tag.attrib['k'] == "addr:postcode":
            node_tags['value'] = audit.update_postcode(tag.attrib['v'])
          else : 
            node_tags['value'] = tag.attrib['v']



          node_tags['type'] = tag.attrib['k'].split(':',1)[0]
          tags.append(node_tags)

        elif PROBLEMCHARS.match(tag.attrib['k']) :
          continue

        else :
          node_tags['id'] = element.attrib['id']
          node_tags['key'] = tag.attrib['k']
          node_tags['value'] = tag.attrib['v']
          node_tags['type'] = default_tag_type
          tags.append(node_tags)

      return {'node': node_attribs, 'node_tags': tags}

    # WAY element
    elif element.tag == 'way':
      for attribute in element.attrib:
        if attribute in WAY_FIELDS:
          way_attribs[attribute] = element.attrib[attribute]

      nd_pos = 1

      for child in element:
        way_node = {}
        way_tag = {}
        if child.tag == 'nd' :
          way_node['id'] = element.attrib['id']
          way_node['node_id'] = child.attrib['ref']
          way_node['position'] = nd_pos
          nd_pos += 1
          way_nodes.append(way_node)
          

        elif child.tag == 'tag' :
          if LOWER_COLON.match(child.attrib['k']):
            way_tag['id'] = element.attrib['id']
            way_tag['key'] = child.attrib['k'].split(':',1)[1]

            if child.attrib['k'] == "addr:street":
              way_tag['value'] = audit.update_name(child.attrib['v'], audit.mapping)
            elif child.attrib['k'] == "addr:postcode":
              way_tag['value'] = audit.update_postcode(child.attrib['v'])
            else :
              way_tag['value'] = child.attrib['v']

            way_tag['type'] = child.attrib['k'].split(':',1)[0]
            tags.append(way_tag)

          elif PROBLEMCHARS.match(child.attrib['k']):
            continue

          else :
            way_tag['id'] = element.attrib['id']
            way_tag['key'] = child.attrib['k']
            way_tag['value'] = child.attrib['v']
            way_tag['type'] = default_tag_type
            tags.append(way_tag)

      return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 32
0
def process_key_and_value(key, value):
    if key == 'addr':
        key = 'address'
    elif key == 'street':
        value = street_name_auditor.update_name(value, street_name_mapping)
    return key, value
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    count = 0

    if element.tag == 'node':
        #id = element.attribut['id']
        for item in node_attr_fields:
            node_attribs[item] = element.attrib[item]
        # code for 'node' element (the parent)

    if element.tag == 'way':
        for item in way_attr_fields:
            way_attribs[item] = element.attrib[item]
        # code for 'way' element (the parent)

    for child in element:
        id = element.attrib['id']
        #code for child elements

        if child.tag == 'tag':
            if problem_chars.match(child.attrib['k']):
                continue

            else:
                fields = {}
                fields['id'] = id

                # --- Cleaning Streetnames
                if 'street' in child.attrib['k']:
                    fields['value'] = update_name(child.attrib['v'], mapping)
                else:
                    fields['value'] = child.attrib['v']

                # --- Cleaning Postcodes
                if 'postcode' in child.attrib['k']:
                    fields['value'] = update_postcode(child.attrib['v'])
                else:
                    fields['value'] = child.attrib['v']

                # --- Cleaning City
                if 'city' in child.attrib['k']:
                    fields['value'] = update_city(child.attrib['v'],
                                                  city_mapping)
                else:
                    fields['value'] = child.attrib['v']

                fields['value'] = child.attrib['v']
                if ':' in child.attrib['k']:
                    loc = child.attrib['k'].find(":")
                    key = child.attrib['k']
                    fields['type'] = key[:loc]
                    fields['key'] = key[loc + 1:]
                else:
                    fields['key'] = child.attrib['k']
                    fields['type'] = 'regular'
                tags.append(fields)
            #code for 'tag' children

        if child.tag == 'nd':
            nds = {}
            nds['id'] = id
            nds['node_id'] = child.attrib['ref']
            nds['position'] = count
            count += 1
            way_nodes.append(nds)
            # code for 'nd' children

    if element.tag == 'node':
        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 34
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    # NODE element
    if element.tag == 'node':
        for attribute in element.attrib:
            if attribute in NODE_FIELDS:
                node_attribs[attribute] = element.attrib[attribute]

        for tag in element:
            node_tags = {}
            if LOWER_COLON.match(tag.attrib['k']):
                node_tags['id'] = element.attrib['id']
                node_tags['key'] = tag.attrib['k'].split(':', 1)[1]

                if tag.attrib['k'] == "addr:street":
                    node_tags['value'] = audit.update_name(
                        tag.attrib['v'], audit.mapping)
                elif tag.attrib['k'] == "addr:postcode":
                    node_tags['value'] = audit.update_postcode(tag.attrib['v'])
                else:
                    node_tags['value'] = tag.attrib['v']

                node_tags['type'] = tag.attrib['k'].split(':', 1)[0]
                tags.append(node_tags)

            elif PROBLEMCHARS.match(tag.attrib['k']):
                continue

            else:
                node_tags['id'] = element.attrib['id']
                node_tags['key'] = tag.attrib['k']
                node_tags['value'] = tag.attrib['v']
                node_tags['type'] = default_tag_type
                tags.append(node_tags)

        return {'node': node_attribs, 'node_tags': tags}

    # WAY element
    elif element.tag == 'way':
        for attribute in element.attrib:
            if attribute in WAY_FIELDS:
                way_attribs[attribute] = element.attrib[attribute]

        nd_pos = 1

        for child in element:
            way_node = {}
            way_tag = {}
            if child.tag == 'nd':
                way_node['id'] = element.attrib['id']
                way_node['node_id'] = child.attrib['ref']
                way_node['position'] = nd_pos
                nd_pos += 1
                way_nodes.append(way_node)

            elif child.tag == 'tag':
                if LOWER_COLON.match(child.attrib['k']):
                    way_tag['id'] = element.attrib['id']
                    way_tag['key'] = child.attrib['k'].split(':', 1)[1]

                    if child.attrib['k'] == "addr:street":
                        way_tag['value'] = audit.update_name(
                            child.attrib['v'], audit.mapping)
                    elif child.attrib['k'] == "addr:postcode":
                        way_tag['value'] = audit.update_postcode(
                            child.attrib['v'])
                    else:
                        way_tag['value'] = child.attrib['v']

                    way_tag['type'] = child.attrib['k'].split(':', 1)[0]
                    tags.append(way_tag)

                elif PROBLEMCHARS.match(child.attrib['k']):
                    continue

                else:
                    way_tag['id'] = element.attrib['id']
                    way_tag['key'] = child.attrib['k']
                    way_tag['value'] = child.attrib['v']
                    way_tag['type'] = default_tag_type
                    tags.append(way_tag)

        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Esempio n. 35
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        node['type'] = element.tag
        created = {}
        
        # create position key from lat lon data
        try:
            node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])]
        except:
            pass
        
        # Process attributes of node/way tag
        for k,v in element.attrib.iteritems():
        	# but creation data in sub-dictionary
            if k in CREATED:
                created[k] = v
                node['created'] = created
            #ignore lat, lon (already processed)
            elif k in ['lat', 'lon']:
                continue 
            # other attributes
            else:
                node[k] = v
        
        # Process child 'tag' tags
        for tag in element.iter('tag'):
        
        	# skip tags where the key contains problem characters - write to error file?
            if problemchars.search(tag.attrib['k']):
                continue
        
        	# Check for address tags
            elif re.compile(r'addr:').match(tag.attrib['k']):
                if not ('address' in node):
                    node['address'] = {}
                if lower_colon.search(tag.attrib['k'][5:]):
                    continue

                # Need to clean street names/zip if applicable
                if audit.is_street_name(tag):
                    name = audit.update_name(tag.attrib['v'])
                    node['address'][tag.attrib['k'][5:]] = name
                elif audit.is_zip(tag):
                    zip = clean_zip(tag.attrib['v'])
                    #if zip == "Bad Zip":                           
                    #    continue
                    node['address'][tag.attrib['k'][5:]] = zip
                else:
                    node['address'][tag.attrib['k'][5:]] = tag.attrib['v']
        
            # Check for is_in tags
            elif re.compile(r'is_in').match(tag.attrib['k']):
                if not ('is_in' in node):
                    node['is_in'] = {}
                
                # split 'is_in' list values into appropriate keys
                if tag.attrib['k'] == 'is_in':
                    if diego_re.search(tag.attrib['v']): #compile diego regx
                        node['is_in']['city'] = 'San Diego'
                    if usa_re.search(tag.attrib['v']):
                        node['is_in']['country'] = 'United States of America'
                        node['is_in']['country_code'] = 'US'
                    if ca_re.search(tag.attrib['v']):
                        node['is_in']['state'] = 'California'
                        node['is_in']['state_code'] = 'CA'
                    if node['is_in'] == {}:
                        node['is_in']['city'] = tag.attrib['v'] #catch for baja and lakeside
                elif lower_colon.search(tag.attrib['k']):
                    key, val = clean_is_in(tag)
                    node['is_in'][key] = val

            # Check for GNIS data - including ele tag
            elif audit.is_gnis(tag):
                if not ('gnis' in node):
                    node['gnis'] = {}
                key, val = clean_gnis(tag)
                node['gnis'][key] = val


            # all other tags - be careful that 'type' does not get overwritten here
            elif tag.attrib['k'] == 'type':
            	node['tag_type'] = tag.attrib['v']
            else:
                node[tag.attrib['k']] = tag.attrib['v']
        
        # node_refs for ways        
        if element.tag == "way" :
            node['node_refs'] = []
            for nd in element.iter('nd'):
                node['node_refs'].append(nd.attrib['ref'])
        return node
    else:
        return None #relations not returned?
Esempio n. 36
0
def shape_element(element):
    node = {}
    # Allows only three basic top level elements
    if element.tag in ('node', 'way', 'relation'):
        # Adding type
        node["type"] = element.tag

        # Adding attribues - generals
        node["id"] = element.attrib["id"]
        if "visible" in element.attrib:
            node["visible"] = element.attrib["visible"]

        # Adding attribues - exceptions #1 'created'
        created = {}
        for key in CREATED:
          created[key] = element.attrib[key]
        # convert from date string to datetime object
        created['timestamp'] = dateutil.parser.parse(element.attrib['timestamp'])
        node["created"] = created

        # Adding attribues - exceptions #2 shaping position
        if "lat" in element.attrib and "lon" in element.attrib:
            node["pos"] = [float(element.attrib["lat"]), float(element.attrib["lon"])]

        # Adding child elements
        node_refs = []
        address = {}
        members = []
        for child in element:
            # Auditing and shaping "tag" elements
            if child.tag == "tag":
                k = child.attrib['k']
                # Ignoring key including problematic characters
                if re.search(problemchars, k) != None:
                    continue
                # Ignoring key including upper case characters
                if re.search(lower, k) != None:
                    # Handling confilcts when second level tag "k" value is 'type'
                    if k == 'type':
                        node[element.tag + '_type'] = child.attrib['v']
                    else:
                        node[k] = child.attrib['v']
                # Ignoring key including problematic characters
                if re.search(lower_colon, k) != None:
                    if k.startswith("addr:"):
                        if len(k.split(":")) == 2 :
                            v = child.attrib['v']
                            # cleaning street 
                            if k == "addr:street":
                                v = audit.update_name(v, audit.mapping)
                            address[k.split(":")[1]] = v
                    else:
                        node[k] = child.attrib['v']
            # for 'way'
            elif child.tag == "nd":
                node_refs.append(child.attrib["ref"])
            # for 'relation'
            elif child.tag == 'member':
                member = {}
                member['ref'] = child.attrib['ref']
                member['role'] = child.attrib['role']
                member['type'] = child.attrib['type']
                members.append(member)

        if len(node_refs) > 0:
            node["node_refs"] = node_refs
        if len(address) > 0:
            node["address"] = address
        if len(members) > 0:
            node["members"] = members

        return node
    else:
        return None