def load_new_tag(element, secondary, default_tag_type): # Creating a new tag dict to go along with the ways_tags and nodes_tags new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): street_name = update_name(secondary.attrib['v']) new['value'] = street_name # Postal Code Updation elif secondary.attrib['k'][6:] == 'postcode': secondary.attrib['v'] = clean_postcode(secondary.attrib['v']) new['value'] = post_code else: new['value'] = secondary.attrib['v'] return new
def tag_attributes(element, default_tag_type, child): tg = {} tg['id'] = element.attrib['id'] if ':' not in child.attrib['k']: tg['key'] = child.attrib['k'] tg['type'] = default_tag_type else: colpos = child.attrib['k'].index(':') pos_col = colpos + 1 tg['key'] = child.attrib['k'][pos_col:] tg['type'] = child.attrib['k'][:colpos] if is_street_name(child): street_name = update_name(child.attrib['v']) tg['value'] = street_name ###====Code to remove invalid postal code====### elif tg['key']=='postcode': pin_code=child.attribute['v'] m=POST_CODE.match(pin_code) if m is not None: if len(pin_code)==6: tg['value']=pin_code else: return None ####===============xxxxxxx====================#### else: tg['value'] = child.attrib['v'] return tg
def add_tag(elem, elem_2, default_tag_type): new_tag = {} new_tag['id'] = elem.attrib['id'] # check for ":" if elem_2.attrib['k'].find(":") >= 0: sec_idx = elem_2.attrib['k'].index(":") new_tag['key'] = elem_2.attrib['k'][sec_idx + 1:] new_tag['type'] = elem_2.attrib['k'][:sec_idx] else: new_tag['key'] = elem_2.attrib['k'] new_tag['type'] = default_tag_type # Update street name if is_street_name(elem_2): street_name = update_name(elem_2.attrib['k'], mapping) new_tag['value'] = street_name # Update postal code elif is_postcode(elem_2): #print secondary.attrib['v'] post_code = update_postcode(elem_2.attrib['v']) new_tag['value'] = post_code # Update phone elif is_phone(elem_2): #print secondary.attrib['v'] phone_num = update_phone(elem_2.attrib['v']) new_tag['value'] = phone_num else: new_tag['value'] = elem_2.attrib['v'] #print secondary.attrib['v'] return new_tag
def load_new_tag(element, secondary, default_tag_type): """ Load a new tag dict to go into the list of dicts for way_tags, node_tags """ new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): # Why don't i need to use mapping, street_mapping, # and num_line_mapping dicts as params? street_name = update_name(secondary.attrib['v'], mapping) new['value'] = street_name elif new['key'] == 'province': # Change Texas to TX province = secondary.attrib['v'] if province == 'Texas': province = 'TX' new['value'] = province else: new['value'] = secondary.attrib['v'] return new
def load_new_tag(element, secondary, default_tag_type): """ Load a new tag dict to go into the list of dicts for way_tags, node_tags """ new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): street_name = update_name(secondary.attrib['v']) new['value'] = street_name elif new['key'] == 'phone': phone_num = phone_cleaned(secondary.attrib['v']) if phone_num is not None: new['value'] = phone_num else: return None else: new['value'] = secondary.attrib['v'] return new
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way": # YOUR CODE HERE node['id'] = element.attrib['id'] node['type'] = element.tag if 'visible' in element.attrib: node['visible'] = element.attrib['visible'] node['created'] = {} for c in CREATED: node['created'][c] = element.attrib[c] if 'lat' in element.attrib: node['pos'] = [ float(element.attrib['lat']), float(element.attrib['lon']) ] if element.find("tag") != None: for tag in element.iter("tag"): if lower_colon.match(tag.attrib['k'] ) and tag.attrib['k'].startswith("addr:"): if 'address' not in node: node['address'] = {} node['address'][tag.attrib['k'].split(":") [1]] = tag.attrib['v'] value = tag.attrib['v'] if audit.is_street_name(tag): value, st_type = audit.update_name( value, audit.mapping) # Update the street name node['address']['street'] = value # Ignore if st_type is not expected if st_type not in audit.expected: continue # Insert st_type to node for analysis node['address']['st_type'] = st_type elif lower.match(tag.attrib['k'] ) and not tag.attrib['k'].startswith("addr:"): node[tag.attrib['k']] = tag.attrib['v'] if element.find("nd") != None: node["node_refs"] = [] for nd in element.iter("nd"): node["node_refs"].append(nd.attrib['ref']) return node else: return None
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way": # Add position dictionary and created dictionary node["pos"] = ["Lat", "Lon"] node["created"] = {} node["node_refs"] = [] node["address"] = {} node["osm_type"] = element.tag # Iterate through element attributes to check for validity for key,item in element.items(): if not problemchars.match(item): if key in CREATED: node["created"][key] = item elif key in ["lon", "lat"]: node["pos"][key =="lon"] = float(item) else: node[key] = item # Iterate through children for child in element: #Make assumption that only tags or nd exists if child.tag == "tag": key = child.get("k") item = child.get("v") if "addr:" in key: if key.count(":") < 2: sub_key = key.split(":")[1] #If street address change the suffix to la_mapping if sub_key == "street": item = update_name(item, la_mapping) try: node["address"][sub_key] = item except TypeError: print("error") pdb.set_trace pass if sub_key = "postcode": item = zip_update(item, node, zip_mapping) pdb.set_trace(): #If more than one colon in address else: break else: node[key] = item elif child.tag == "nd": item = child.get("ref") node["node_refs"].append(item)
def load_new_tag(element, secondary, default_tag_type): """ Load a new tag dict to go into the list of dicts for way_tags, node_tags """ new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): # Why don't i need to use mapping, street_mapping, # and num_line_mapping dicts as params? street_name = update_name(secondary.attrib['v']) new['value'] = street_name elif new['key'] == 'phone': phone_num = update_phone_num(secondary.attrib['v']) if phone_num is not None: new['value'] = phone_num else: return None elif new['key'] == 'province': # Change Ontario to ON province = secondary.attrib['v'] if province == 'Ontario': province = 'ON' new['value'] = province elif new['key'] == 'postcode': post_code = secondary.attrib['v'].strip() m = POSTCODE.match(post_code) if m is not None: # Add space in middle if there is none if " " not in post_code: post_code = post_code[:3] + " " + post_code[3:] # Convert to upper case new['value'] = post_code.upper() else: # Keep zip code revealed in postal code audit for document deletion purposes if post_code[:5] == "14174": new['value'] = post_code # Ignore tag if improper postal code format else: return None else: new['value'] = secondary.attrib['v'] return new
def insert_address(node, address_name_tokens, tag): if "address" not in node: node["address"] = {} for token in address_name_tokens[1:]: if token == "street": node["address"][token] = audit.update_name(tag.attrib["v"], mapping) else: node["address"][token] = tag.attrib["v"] return node
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : # YOUR CODE HERE lat_lon_array = [0,0] has_pos = False created_dict = {} node['type'] = element.tag for key,value in element.attrib.iteritems(): if key in CREATED: created_dict[key] = value elif key in ['lat','lon']: has_pos = True if key == 'lat': lat_lon_array[0] = float(value) else: lat_lon_array[1] = float(value) else: node[key] = value address = {} has_address = False for tag in element.iter('tag'): if problemchars.search(tag.get('k')) is not None or len(tag.get('k').split(":"))>2: continue elif 'addr:' in tag.get('k'): has_address = True addr_list = tag.get('k').split(":") if addr_list[1] == 'street': #if address is an "improper_address", then skip this tag. if tag.get('v') not in improper_address: address[addr_list[1]] = update_name(tag.get('v')) else: print tag.get('v') continue else: address[addr_list[1]] = tag.get('v') else: node[tag.get('k')] = tag.get('v') node_refs = [] has_node_refs = False for tag in element.iter('nd'): has_node_refs = True node_refs.append(tag.get('ref')) if has_node_refs: node['node_refs'] = node_refs node['created'] = created_dict if has_pos: node['pos'] = lat_lon_array if has_address: node['address'] = address return node else: return None
def shape_element(element): if element.tag == "node" or element.tag == "way": node = { "id": element.attrib['id'], "type": element.tag, "visible": element.get("visible"), "created": { "version": element.get("version"), "changeset": element.get("changeset"), "timestamp": element.get("timestamp"), "user": element.get("user"), "uid": structure_single_user_id(element.get("uid")) } } if element.find("tag") is not None: for tag in element.iter("tag"): # Only add address node = {} when any of address elements exist. for elem_address in ADDRESS: if tag.attrib['k'] == elem_address: node["address"] = {} if tag.attrib['k'] == "addr:housenumber": node["address"]["housenumber"] = tag.attrib['v'] if tag.attrib['k'] == "addr:postcode": node["address"]["postcode"] = tag.attrib['v'] if tag.attrib['k'] == "addr:street": node["address"]["street"] = update_name( tag.attrib['v'], mapping) if tag.attrib['k'] == "amenity": node["amenity"] = tag.attrib['v'] if tag.attrib['k'] == "cuisine": node["cuisine"] = tag.attrib['v'] if tag.attrib['k'] == "name": node["name"] = tag.attrib['v'] if tag.attrib['k'] == "phone": node["phone"] = tag.attrib['v'] for nd_elem in element.iter("nd"): if 'node_refs' not in node: node['node_refs'] = [] node['node_refs'].append(nd_elem.get("ref")) if element.get("lat") and element.get("lon"): node["pos"] = [ float(element.get("lat")), float(element.get("lon")) ] return node else: return None
def shape_element(element): node = {} node["created"] = {} node["address"] = {} node["pos"] = [] node_refs = [] if element.tag == "node" or element.tag == "way" : # YOUR CODE HERE if "id" in element.attrib: node["id"] = element.attrib["id"] node["type"] = element.tag if "visible" in element.attrib: node["visible"] = element.attrib["visible"] for item in CREATED: if item in element.attrib: node["created"][item] = element.attrib[item] if "lat" in element.attrib: node["pos"].append(float(element.attrib["lat"])) if "lon" in element.attrib: node["pos"].append(float(element.attrib["lon"])) for tag in element.iter("tag"): if not(problemchars.search(tag.attrib['k'])): if tag.attrib['k'] == "addr:housenumber": node["address"]["housenumber"] = tag.attrib['v'] if tag.attrib['k'] == "addr:postcode": node["address"]["postcode"] = audit.update_postcode(tag.attrib['v']) if tag.attrib['k'] == "addr:street": node["address"]["street"] = audit.update_name(tag.attrib['v']) if tag.attrib['k'].find("addr")==-1: node[tag.attrib['k']] = tag.attrib['v'] if node["address"] == {}: node.pop("address", None) # Processing nd for a specific way if element.tag == "way": for nd in element.iter("nd"): node_refs.append(nd.attrib["ref"]) if node_refs != []: node["node_refs"] = node_refs return node else: return None
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way": # YOUR CODE HERE node['id'] = element.attrib['id'] node['type'] = element.tag if 'visible' in element.attrib: node['visible'] = element.attrib['visible'] node['created'] = {} for c in CREATED: node['created'][c] = element.attrib[c] if 'lat' in element.attrib: node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])] if element.find("tag") != None: for tag in element.iter("tag"): if lower_colon.match(tag.attrib['k']) and tag.attrib['k'].startswith("addr:"): if 'address' not in node: node['address'] = {} node['address'][tag.attrib['k'].split(":")[1]] = tag.attrib['v'] value = tag.attrib['v'] if audit.is_street_name(tag): value, st_type = audit.update_name(value, audit.mapping) # Update the street name node['address']['street'] = value # Ignore if st_type is not expected if st_type not in audit.expected: continue # Insert st_type to node for analysis node['address']['st_type'] = st_type elif lower.match(tag.attrib['k']) and not tag.attrib['k'].startswith("addr:"): node[tag.attrib['k']] = tag.attrib['v'] if element.find("nd") != None: node["node_refs"] = [] for nd in element.iter("nd"): node["node_refs"].append(nd.attrib['ref']) return node else: return None
def load_new_tag(element, secondary, default_tag_type): """ Load a new tag dict to go into the list of dicts for way_tags, node_tags """ new = {} new['id'] = element.attrib['id'] if ":" not in secondary.attrib['k']: new['key'] = secondary.attrib['k'] new['type'] = default_tag_type else: post_colon = secondary.attrib['k'].index(":") + 1 new['key'] = secondary.attrib['k'][post_colon:] new['type'] = secondary.attrib['k'][:post_colon - 1] # Cleaning and loading values of various keys if is_street_name(secondary): # Why don't i need to use mapping, street_mapping, # and num_line_mapping dicts as params? street_name = update_name(secondary.attrib['v']) new['value'] = street_name elif new['key'] == 'phone': phone_num = update_phone_num(secondary.attrib['v']) if phone_num is not None: new['value'] = phone_num else: return None elif new['key'] == 'city': city = update_city(secondary.attrib['v']) new['value'] = city # elif new['key'] == 'housenumber': # housenumber = update_housenumber(secondary.attrib['v']) # new['value'] = housenumber # elif new['key'] == 'province': # # Change Ontario to ON # province = secondary.attrib['v'] # if province == 'Ontario': # province = 'ON' # new['value'] = province # elif new['key'] == 'postcode': post_code = update_postcode(secondary.attrib['v']) new['value'] = post_code else: new['value'] = secondary.attrib['v'] return new
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : # YOUR CODE HERE node["id"] = element.get("id") node["type"] = element.tag if element.get('visible') is not None: node['visible'] = element.get('visible') if element.get("lat") and element.get("lon"): node["pos"] = [float(element.get("lat")), float(element.get("lon"))] node["created"] = { "version": element.get("version"), "changeset": element.get("changeset"), "timestamp": element.get("timestamp"), "user": element.get("user"), "uid": element.get("uid") } descendants = list(element.iter()) nd_array = [] address = {} for nd in element.findall('nd'): nd_array.append(nd.get("ref")) node["node_refs"] = nd_array for t in element.findall('tag'): k = t.get("k") if problemchars.search(k): continue else: if is_address.search(k) and not second_colon.search(k): if is_street.search(k): address.update({"street": audit.update_name(t.get("v"))}) else: prop = k.split(":")[-1] address.update({prop: t.get("v")}) else: node[k] = t.get("v") if bool(address): node["address"] = address address = {} nd_array = [] print node return node else: return None
def update_dict(elem): # updates secondary tags dictionary for a given element if a zip code or street name needs to be changed update_type = elem.attrib['k'] if update_type == 'addr:postcode': new = update_zip(elem.attrib['v']) elif update_type == 'addr:street': new = update_name(elem.attrib['v'], mapping) else: return False # if the dictionary does not need to be updated, output False if new != elem.attrib['v']: # print '{} --> {}'.format(elem.attrib['v'], new) return new else: return False
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : node['id'] = element.attrib['id'] node['type'] = element.tag if 'visible' in element.attrib: node['visible'] = str(element.attrib["visible"]).lower() created = {} for subfield in CREATED: if subfield in element.attrib and element.attrib[subfield] is not None: created[subfield] = element.attrib[subfield] if 'lat' in element.attrib and 'lon' in element.attrib: pos = [float(element.attrib['lat']),float(element.attrib['lon'])] node['pos'] = pos node['created'] = created addr = {} for tag in element.iter("tag"): if tag.attrib['k'].find("addr:") >=0: k_ = tag.attrib['k'].replace('addr:','') if k_.find(":") <0: # Fix unexpected street name if k_ == "street": addr[k_] = audit.update_name(tag.attrib['v'], audit.mapping) elif k_ == "city": addr[k_] = audit.update_name_city(tag.attrib['v'], audit.mapping_city) else: addr[k_] = tag.attrib['v'] else: node[tag.attrib['k']] = tag.attrib['v'] node_ref = [] if addr !={}: node['address'] = addr for nd in element.iter("nd"): node_ref.append(nd.attrib['ref']) if node_ref!= []: node['node_refs'] = node_ref return node else: return None
def process_children(node, element): """ Helper function that builts the JSON inside the node object parameter. Data about the address field is retrieve from the element parameter """ # address dictionary to be inserted address = {} node_refs = [] # iterate through every child of passed element for child in element: # if the child is a 'tag' tag if child.tag == 'tag': # iterate through all child.attributes for attrib in child.attrib: # if attribute is in ADDRESS list, add key and value to the address if child.attrib[attrib] in ADDRESS: # first update street name for consistency if is_street_name(child) and is_street_name in mapping: address[child.attrib['k'][5:]] = update_name(child.attrib['v'], mapping) else: address[child.attrib['k'][5:]] = child.attrib['v'] # else, just add the value of the attribute to the node elif not child.attrib['k'].startswith(ADDRESS_PREFIX): key = child.attrib['k'] if '.' in key: key = remove_dots(key) node[key] = child.attrib['v'] if child.tag == 'nd': for attrib in child.attrib: node_refs.append(child.attrib[attrib]) # insert node refs when applicable if node_refs != []: node['node_refs'] = node_refs # insert address when applicable if address != {}: node['address'] = address
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [] #if child.attrib["k"] == 'addr:street': # node_tag["value"] = update_name(child.attrib["v"], mapping) if element.tag == 'node': for attrib in element.attrib: if attrib in NODE_FIELDS: node_attribs[attrib] = element.attrib[attrib] for child in element: node_tag = {} if LOWER_COLON.match(child.attrib['k']): node_tag['type'] = child.attrib['k'].split(':', 1)[0] node_tag['key'] = child.attrib['k'].split(':', 1)[1] node_tag['id'] = element.attrib['id'] if child.attrib["k"] == 'addr:street': node_tag["value"] = update_name(child.attrib["v"], mapping) else: node_tag['value'] = child.attrib['v'] tags.append(node_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: node_tag['type'] = 'regular' node_tag['key'] = child.attrib['k'] node_tag['id'] = element.attrib['id'] node_tag['value'] = child.attrib['v'] tags.append(node_tag) return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for attrib in element.attrib: if attrib in WAY_FIELDS: way_attribs[attrib] = element.attrib[attrib] position = 0 for child in element: way_tag = {} way_node = {} if child.tag == 'tag': if LOWER_COLON.match(child.attrib['k']): way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: way_tag['type'] = 'regular' way_tag['key'] = child.attrib['k'] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif child.tag == 'nd': way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = position position += 1 way_nodes.append(way_node) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def colon_clean(key, val, node): """Performs the shaping and cleaning for keys with a ':' in their names. Handles a variety of cases by ensuring protected keys are not overwritten or reformatted improperly. In general, creates nested dictionaries with the first part preceding the colon as the top level key, and string following the colon will be the nested dictionary key values.""" if len(key) > 5 and key[:5] == 'name:': # Create array of alternate names (or names in different languages) key = key[5:] if 'other_names' not in node.keys(): node['other_names'] = {key:val} else: node['other_names'].update( {key:val} ) elif len(key) > 5 and key[:5] == 'addr:': # For all address values, capitalize only the first character of each word val = normalize_capitalization(val) key = key[5:] if lower_colon.search(key) is None: if key == "city": fixed_city = update_city(val, city_mapping) if fixed_city != val: print val, "=>", fixed_city val = fixed_city #Defaults to having 'San Francisco' as city name elif "address" not in node.keys(): node['address'] = {'city':default_city} elif "city" not in node['address'].keys(): node['address'].update({'city':default_city}) if key == "street": fixed_name = update_name(val, mapping) if fixed_name != val: print val, "=>", fixed_name val = fixed_name if 'address' not in node.keys(): node['address'] = {key:val} else: node['address'].update({key:val}) # Ignore keys with more than 1 colon (and starting with "addr:") elif len(key) > 6 and key[:6] == 'tiger:': key = key[6:] # Skips 'Tiger:MTFCC' keys if key == 'mtfcc': return node if 'tiger' not in node.keys(): node['tiger'] = {key:val} else: node['tiger'].update({key:val}) elif len(key) > 5 and key[:5] == 'gnis:': key = key[5:] if 'gnis' not in node.keys(): node['gnis'] = {key:val} else: node['gnis'].update({key:val}) elif skip_colon.search(key) is not None: # These keys have garbage values, don't store them # Examples of keys that are skipped: # 'redwood_city_ca:addr_id', 'rwc_ca:buildingid', 'paloalto_ca:id' # 'gosm:sig:8CBDE645', 'massgis:cat' print 'Skip colon match: %s=%s'%(key,val) return node else: dict_key,nested_key = key.split(":",1) # Only create dict with first part of key if nested_key in RESERVED_KEYS: # Key 'note:address' contains a street address, save it if dict_key == 'note' and nested_key == 'address': val = update_name(val, mapping) if 'address' not in node.keys(): node['address'] = {'street':val} elif 'street' not in node['address'].keys(): node['address'].update({'street':val}) else: node['address'].update({'street_address':val}) # Skipping 'source:name' return node #if debug: print "Other (%s) key: %s, val: %s"%(key,nested_key,val) if dict_key not in node.keys(): node[dict_key] = {nested_key:val} else: if isinstance(node[dict_key],dict): node[dict_key].update({nested_key:val}) else: # Convert to dict (use orig outer dict_key with '_key' # appended as the nested key for the original value node[dict_key] = {dict_key+'_key':node[dict_key], nested_key:val} return node
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements position = 0 # YOUR CODE HERE if element.tag == 'node': for attr in element.attrib: if attr in NODE_FIELDS: node_attribs[attr] = element.attrib[attr] for child in element: node_tag = {} if LOWER_COLON.match(child.attrib['k']): node_tag['type'] = child.attrib['k'].split(':', 1)[0] node_tag['key'] = child.attrib['k'].split(':', 1)[1] node_tag['id'] = element.attrib['id'] node_tag['value'] = child.attrib['v'] tags.append(node_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: node_tag['type'] = 'regular' node_tag['key'] = child.attrib['k'] node_tag['id'] = element.attrib['id'] node_tag['value'] = child.attrib['v'] tags.append(node_tag) return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for attr in element.attrib: if attr in WAY_FIELDS: way_attribs[attr] = element.attrib[attr] for child in element: way_tag = {} way_node = {} if child.tag == 'tag': if LOWER_COLON.match(child.attrib['k'] ) and child.attrib['k'][:5] == "addr:": if child.attrib['k'][5:] == "street": way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] way_tag['id'] = element.attrib['id'] way_tag['value'] = update_name(child.attrib['v'], mapping) tags.append(way_tag) if child.attrib['k'][5:] == "postcode": way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] way_tag['id'] = element.attrib['id'] way_tag['value'] = update_postal_code( child.attrib['v'], zip_mapping) tags.append(way_tag) elif LOWER_COLON.match(child.attrib['k']): way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: way_tag['type'] = 'regular' way_tag['key'] = child.attrib['k'] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif child.tag == 'nd': way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = position position += 1 way_nodes.append(way_node) #print tags return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element): node = {} created = {} address = {} lat = 0.0 lon = 0.0 if element.tag == "node" or element.tag == "way": node['type'] = element.tag for key in element.attrib.keys(): if key in CREATED: created[key] = element.attrib[key] elif key == 'lat': lat = float(element.attrib['lat']) elif key == 'lon': lon = float(element.attrib['lon']) else: node[key] = element.attrib[key] node['created'] = created node['pos'] = [lat, lon] # Filter out any nodes or ways situated in Poland or with a postcode that does not belong to Berlin # or its near surroundings for kv in element.findall('tag'): if kv.attrib['k'] == 'addr:country' and kv.attrib['v'] == 'PL': return None try: if kv.attrib['k'] == 'addr:postcode' and int(kv.attrib['v']) not in range(10115, 15000): return None except ValueError: return None for kv in element.findall('tag'): if re.search(problemchars, kv.attrib['k']): continue if 'addr:street:' in kv.attrib['k']: continue if re.search(lower_colon, kv.attrib['k']): if 'addr:' in kv.attrib['k']: key = re.sub(r'addr:', '', kv.attrib['k']) if kv.attrib['k'] == 'addr:housenumber': kv.attrib['v'] = audit.update_housenumber(kv.attrib['v']) if kv.attrib['k'] == 'addr:street': kv.attrib['v'] = audit.update_name(kv.attrib['v']) if kv.attrib['v']: address[key] = kv.attrib['v'] else: continue else: node[kv.attrib['k']] = kv.attrib['v'] else: # if kv.attrib['k'] == 'phone': kv.attrib['v'] = audit.update_phonenumber(kv.attrib['v']) if kv.attrib['v']: node[kv.attrib['k']] = kv.attrib['v'] else: continue if address: node['address'] = address refs = [] for nd in element.findall('nd'): refs.append(nd.attrib['ref']) if refs: node['node_refs'] = refs #print node return node else: return None
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [] if element.tag == 'node': node_atts = element.attrib node_children = element.getchildren() for field in node_attr_fields: if field not in node_atts and field == 'user': node_attribs[field] = 'NO_USER' elif field not in node_atts and field == 'uid': node_attribs[field] = 0 else: node_attribs[field] = node_atts[field] if len(node_children) == 0: tags = [] else: for child in node_children: child_dict = {} child_atts = child.attrib for field in NODE_TAGS_FIELDS: if field == 'id': child_dict[field] = node_atts[field] elif field == 'key': if ':' in child_atts['k']: child_dict['type'], child_dict[field] = \ child_atts['k'].split(':', 1) else: child_dict[field] = child_atts['k'] child_dict['type'] = 'regular' elif field == 'value': if audit.is_street_name(child): child_dict[field] = audit.update_name( child_atts['v'], audit.mapping) elif audit.is_up_for_fixing(child): child_dict[field] = audit.fix(child_atts['v']) else: child_dict[field] = child_atts['v'] tags.append(child_dict) return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': way_atts = element.attrib way_children = element.getchildren() for field in way_attr_fields: way_attribs[field] = way_atts[field] if len(way_children) == 0: tags = [] way_nodes = [] else: counter = 0 for child in way_children: if child.tag == 'tag': child_dict = {} child_atts = child.attrib for field in WAY_TAGS_FIELDS: if field == 'id': child_dict[field] = way_atts[field] elif field == 'key': if ':' in child_atts['k']: child_dict['type'], child_dict[field] = \ child_atts['k'].split(':', 1) else: child_dict[field] = child_atts['k'] child_dict['type'] = 'regular' elif field == 'value': if audit.is_street_name(child): child_dict[field] = audit.update_name( child_atts['v'], audit.mapping) elif audit.is_up_for_fixing(child): child_dict[field] = audit.fix( child_atts['v']) else: child_dict[field] = child_atts['v'] tags.append(child_dict) elif child.tag == 'nd': child_dict = {} child_atts = child.attrib for field in WAY_NODES_FIELDS: if field == 'id': child_dict[field] = way_atts[field] elif field == 'node_id': child_dict[field] = child_atts['ref'] else: child_dict[field] = counter counter += 1 way_nodes.append(child_dict) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : # created and position elements: node['type'] = element.tag for attribute in element.attrib: #created dictionary if attribute in CREATED: if 'created' not in node: node['created'] = {} #timestamp is not json serializeable if attribute == "timestamp": node['created'][attribute] = str(element.attrib[attribute]) else: node['created'][attribute] = element.attrib[attribute] #position list elif attribute in COORDINATES: if 'pos' not in node: node['pos'] = [None,None] if attribute == 'lat': node['pos'][0] = float(element.attrib[attribute]) if attribute == 'lon': node['pos'][1] = float(element.attrib[attribute]) else: node[attribute] = element.attrib[attribute] #iterate over child-tags: for tag in element.iter("tag"): if not problemchars.search(tag.attrib['k']): # Tags with single colon and beginning with addr if lower_colon.search(tag.attrib['k']) and tag.attrib['k'].find('addr') == 0: if 'address' not in node: node['address'] = {} small_attribute = tag.attrib['k'].split(':', 1) if audit.is_street_name(tag): better_name = audit.update_name(tag.attrib['v'], audit.mapping_road) better_name_direction = audit.update_direction(better_name, audit.mapping_directions) node['address'][small_attribute[1]] = better_name_direction else: node['address'][small_attribute[1]] = tag.attrib['v'] # All other tags that don't begin with "addr" elif not tag.attrib['k'].find('addr') == 0: if tag.attrib['k'] not in node: node[tag.attrib['k']] = tag.attrib['v'] else: node["tag:" + tag.attrib['k']] = tag.attrib['v'] # change node_refs in way elements for nd in element.iter("nd"): if 'node_refs' not in node: node['node_refs'] = [] node['node_refs'].append(nd.attrib['ref']) return node else: return None
def process_key_and_value(key, value): if key == 'addr': key = 'address' elif key == 'street': value = street_name_auditor.update_name(value, sreet_name_mapping) return key, value
def shape_element(element): expected = [ "Calle", "CALLE", u"Barrio", u"Centro", "Calleja", "Centro Comercial", "Avenida", "Plaza", "Camino", "Estacion", "Parking", "Campus", "Carretera", "Glorieta", "Paseo", "Rotonda", "Juan", "Gran", "Dante", "Maria", "Pasaje", u'Le\xf3n', u'Comisar\xeda', "Edificio", "Vivero", "CARRETERA", "Centro", "Lope", u'pol\xedgono', u'Pol\xedgono', "Bajada", "Subida", "Grupo", "Rampa", "Barrio", "AREA", "La", "Acceso", "POLIGONO", "Mercado", "Cuesta", u"Urbanizaci\xf3n", "Ernest", "Pol", "Puerto", "Jardines", "San", u"Autov\xeda", u"V\xeda", "MercaSantander", u"Traves\xeda", u"ISLA", u"Playa", "N-611", "BARRIO", "Las" ] # UPDATE THIS VARIABLE mapping = { "C/": "Calle", "Barrio": "Barrio", "Calle": "Calle", "Calles": "Calle", "Avenidad": "Avenida", "Avda.": "Avenida", u"Calla": "Calle", "name=Avenida": "Avenida", "name=Calle": "Calle", "AREA,": "Area", "Bajade": "Bajada", "Ramapa": "Rampa" } node = {} # you should process only 2 types of top level tags: "node" and "way" if element.tag == "node" or element.tag == "way": for key in element.attrib.keys(): val = element.attrib[key] node["type"] = element.tag if key in CREATED: if not "created" in node.keys(): node["created"] = {} node["created"][key] = val elif key == "lat" or key == "lon": if not "pos" in node.keys(): node["pos"] = [0.0, 0.0] old_pos = node["pos"] if key == "lat": new_pos = [float(val), old_pos[1]] else: new_pos = [old_pos[0], float(val)] node["pos"] = new_pos else: node[key] = val for tag in element.iter("tag"): tag_key = tag.attrib['k'] tag_val = tag.attrib['v'] if problemchars.match(tag_key): continue elif tag_key.startswith("addr:"): if not "address" in node.keys(): node["address"] = {} addr_key = tag.attrib['k'][len("addr:"):] if lower_colon.match(addr_key): continue else: if tag_val.split(' ')[0] in expected: node["address"][addr_key] = tag_val elif tag_key.endswith("street"): node["address"][addr_key] = update_name( tag_val, mapping) elif tag_key.endswith("postcode"): node["address"][addr_key] = update_postcode( tag_val) elif tag_key.endswith("housenumber"): node["address"][addr_key] = update_housenumber( tag_val) else: node["address"][addr_key] = tag_val elif lower_colon.match(tag_key): node[tag_key] = tag_val else: node[tag_key] = tag_val for tag in element.iter("nd"): if not "node_refs" in node.keys(): node["node_refs"] = [] node_refs = node["node_refs"] node_refs.append(tag.attrib["ref"]) node["node_refs"] = node_refs return node else: return None
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : # YOUR CODE HERE created = {} addr = {} pos = [] node['type'] = element.tag for keys in element.attrib: if keys == 'timestamp': timestamp = element.attrib[keys] year = timestamp[:4] month = timestamp[5:7] created['year'] = year created['month'] = month if keys in CREATED: created[keys] = element.attrib[keys] elif keys == 'lat': pos.append(float(element.attrib[keys])) pos.append(float(element.attrib['lon'])) elif keys == 'lon': continue else: node[keys] = element.attrib[keys] node['created'] = created node['pos'] = pos nodes = [] for tags in element: #print tags.attrib #print tags.tag attrib = tags.attrib if tags.tag == 'nd': nodes.append(attrib['ref']) elif tags.tag == 'tag': #if problematic then ignore key = attrib['k'] if key.startswith('addr:'): if len( key.split(':')) == 2 : if(key == 'addr:street') : #print attrib['v'],"==>", audit.update_name(attrib['v'], audit.mapping) addr[key.split(':')[1]] = audit.update_name(attrib['v'], audit.mapping) if((key == 'addr:postcode') or (key =='addr:zipcode') ) : addr[key.split(':')[1]] = audit.update_zipcode(attrib['v']) elif problemchars.search(attrib['v']): #print ">>>",attrib['v'] continue else : node[attrib['k']] = attrib['v'] if element.tag == 'way' and len(nodes) > 0: node['node_refs'] = nodes if len(addr) > 0: node['address'] = addr #print node # for tags in iter(element): # #check if value is problematic # tag = tags.attrib # if problemchars.search(tag['v']): # continue # # #check if the value should go in created # if tag['k'] in CREATED: # created['k'] = tag['v'] # continue; # if tag['k'].startswith('addr:'): # addr[tag['k']] = tag['v'] # continue # # node[tag['k']] = tag['v'] # # node['address'] = addr # node['created'] = created # print node element.clear() return node else: return None
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements # YOUR CODE HERE if element.tag == 'node': #node_attribs=element.attrib for i, e in enumerate(node_attr_fields): node_attribs[e] = element.attrib[node_attr_fields[i]] for elem in element.iter('tag'): node_tag = {} node_tag['id'] = node_attribs['id'] k = elem.get('k') v = elem.get('v') node_tag['value'] = v #print k,v if problem_chars.search(k): return None elif ':' in k: if k == 'addr:street': if v != None: update_name(v, mapping) else: continue elif k == 'addr:postcode': if len(v) != 6: continue node_tag['type'], node_tag['key'] = k.split(':', 1) else: node_tag['key'] = k node_tag['type'] = default_tag_type tags.append(node_tag) #print elem.attrib #print {'node': node_attribs, 'node_tags': tags} return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for i, e in enumerate(way_attr_fields): way_attribs[e] = element.attrib[way_attr_fields[i]] for i, elem in enumerate(element.iter('nd')): way_node = {} way_node['id'] = way_attribs['id'] way_node['node_id'] = elem.get('ref') way_node['position'] = i way_nodes.append(way_node) for elem in element.iter('tag'): way_tag = {} way_tag['id'] = way_attribs['id'] k = elem.get('k') v = elem.get('v') way_tag['value'] = v #print k,v if problem_chars.search(k): return None elif ":" in k: if k == 'addr:street': update_name(v, mapping) elif k == 'addr:postcode' and len(v) != 6: continue way_tag['type'], way_tag['key'] = k.split(':', 1) else: way_tag['key'] = k way_tag['type'] = default_tag_type tags.append(way_tag) #print elem.attrib #print {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags} return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements # YOUR CODE HERE if element.tag == 'node': for attrib in element.attrib: if attrib in NODE_FIELDS: node_attribs[attrib] = element.attrib[attrib] for child in element: node_tag = {} if PROBLEMCHARS.match(child.attrib["k"]): continue elif LOWER_COLON.match( child.attrib["k"] ): # code for 'id', 'key' and 'type' keys ... node_tag['type'] = child.attrib['k'].split(':', 1)[0] node_tag['key'] = child.attrib['k'].split(':', 1)[1] node_tag['id'] = element.attrib['id'] node_tag['value'] = child.attrib['v'] if child.attrib["k"] == 'addr:street': node_tag["value"] = update_name(child.attrib["v"]) elif child.attrib["k"] == 'addr:state': # check to see if the cleaning function returns a value: if updat_state_name(child.attrib["v"], stat_mapping): # if it does, add it to the dictionary node_tag["value"] = updat_state_name( child.attrib["v"], stat_mapping) else: # if it doesn't, the attribute is uncleanable so move to the next element continue else: #===>> is this else is right ?? YES! # if the two conditions above don't hold, you want # the value to be the original 'v' attribute from the '.osm' file: node_tag["value"] = child.attrib["v"] # ===> unindent this statement tags.append(node_tag) else: node_tag['type'] = 'regular' node_tag['key'] = child.attrib['k'] node_tag['id'] = element.attrib['id'] node_tag['value'] = child.attrib['v'] tags.append(node_tag) return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': for attrib in element.attrib: if attrib in WAY_FIELDS: way_attribs[attrib] = element.attrib[attrib] position = 0 for child in element: way_tag = {} way_node = {} if child.tag == 'tag': if PROBLEMCHARS.match(child.attrib["k"]): continue elif LOWER_COLON.match( child.attrib["k"] ): # code for 'id', 'key' and 'type' keys ... way_tag['type'] = child.attrib['k'].split(':', 1)[0] way_tag['key'] = child.attrib['k'].split(':', 1)[1] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] if child.attrib["k"] == 'addr:street': way_tag["value"] = update_name(child.attrib["v"]) elif child.attrib["k"] == 'addr:state': # check to see if the cleaning function returns a value: if updat_state_name(child.attrib["v"], stat_mapping): # if it does, add it to the dictionary way_tag["value"] = updat_state_name( child.attrib["v"], stat_mapping) else: # if it doesn't, the attribute is uncleanable so move to the next element continue else: way_tag["value"] = child.attrib["v"] tags.append(way_tag) else: way_tag['type'] = 'regular' way_tag['key'] = child.attrib['k'] way_tag['id'] = element.attrib['id'] way_tag['value'] = child.attrib['v'] tags.append(way_tag) elif child.tag == 'nd': way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = position position += 1 way_nodes.append(way_node) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [] # Handle secondary tags the same way for both node and way elements # NODE element if element.tag == 'node': for attribute in element.attrib: if attribute in NODE_FIELDS: node_attribs[attribute] = element.attrib[attribute] for tag in element : node_tags = {} if LOWER_COLON.match(tag.attrib['k']) : node_tags['id'] = element.attrib['id'] node_tags['key'] = tag.attrib['k'].split(':',1)[1] if tag.attrib['k'] == "addr:street": node_tags['value'] = audit.update_name(tag.attrib['v'], audit.mapping) elif tag.attrib['k'] == "addr:postcode": node_tags['value'] = audit.update_postcode(tag.attrib['v']) else : node_tags['value'] = tag.attrib['v'] node_tags['type'] = tag.attrib['k'].split(':',1)[0] tags.append(node_tags) elif PROBLEMCHARS.match(tag.attrib['k']) : continue else : node_tags['id'] = element.attrib['id'] node_tags['key'] = tag.attrib['k'] node_tags['value'] = tag.attrib['v'] node_tags['type'] = default_tag_type tags.append(node_tags) return {'node': node_attribs, 'node_tags': tags} # WAY element elif element.tag == 'way': for attribute in element.attrib: if attribute in WAY_FIELDS: way_attribs[attribute] = element.attrib[attribute] nd_pos = 1 for child in element: way_node = {} way_tag = {} if child.tag == 'nd' : way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = nd_pos nd_pos += 1 way_nodes.append(way_node) elif child.tag == 'tag' : if LOWER_COLON.match(child.attrib['k']): way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'].split(':',1)[1] if child.attrib['k'] == "addr:street": way_tag['value'] = audit.update_name(child.attrib['v'], audit.mapping) elif child.attrib['k'] == "addr:postcode": way_tag['value'] = audit.update_postcode(child.attrib['v']) else : way_tag['value'] = child.attrib['v'] way_tag['type'] = child.attrib['k'].split(':',1)[0] tags.append(way_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else : way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'] way_tag['value'] = child.attrib['v'] way_tag['type'] = default_tag_type tags.append(way_tag) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def process_key_and_value(key, value): if key == 'addr': key = 'address' elif key == 'street': value = street_name_auditor.update_name(value, street_name_mapping) return key, value
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements count = 0 if element.tag == 'node': #id = element.attribut['id'] for item in node_attr_fields: node_attribs[item] = element.attrib[item] # code for 'node' element (the parent) if element.tag == 'way': for item in way_attr_fields: way_attribs[item] = element.attrib[item] # code for 'way' element (the parent) for child in element: id = element.attrib['id'] #code for child elements if child.tag == 'tag': if problem_chars.match(child.attrib['k']): continue else: fields = {} fields['id'] = id # --- Cleaning Streetnames if 'street' in child.attrib['k']: fields['value'] = update_name(child.attrib['v'], mapping) else: fields['value'] = child.attrib['v'] # --- Cleaning Postcodes if 'postcode' in child.attrib['k']: fields['value'] = update_postcode(child.attrib['v']) else: fields['value'] = child.attrib['v'] # --- Cleaning City if 'city' in child.attrib['k']: fields['value'] = update_city(child.attrib['v'], city_mapping) else: fields['value'] = child.attrib['v'] fields['value'] = child.attrib['v'] if ':' in child.attrib['k']: loc = child.attrib['k'].find(":") key = child.attrib['k'] fields['type'] = key[:loc] fields['key'] = key[loc + 1:] else: fields['key'] = child.attrib['k'] fields['type'] = 'regular' tags.append(fields) #code for 'tag' children if child.tag == 'nd': nds = {} nds['id'] = id nds['node_id'] = child.attrib['ref'] nds['position'] = count count += 1 way_nodes.append(nds) # code for 'nd' children if element.tag == 'node': return {'node': node_attribs, 'node_tags': tags} elif element.tag == 'way': return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS, problem_chars=PROBLEMCHARS, default_tag_type='regular'): """Clean and shape node or way XML element to Python dict""" node_attribs = {} way_attribs = {} way_nodes = [] tags = [ ] # Handle secondary tags the same way for both node and way elements # NODE element if element.tag == 'node': for attribute in element.attrib: if attribute in NODE_FIELDS: node_attribs[attribute] = element.attrib[attribute] for tag in element: node_tags = {} if LOWER_COLON.match(tag.attrib['k']): node_tags['id'] = element.attrib['id'] node_tags['key'] = tag.attrib['k'].split(':', 1)[1] if tag.attrib['k'] == "addr:street": node_tags['value'] = audit.update_name( tag.attrib['v'], audit.mapping) elif tag.attrib['k'] == "addr:postcode": node_tags['value'] = audit.update_postcode(tag.attrib['v']) else: node_tags['value'] = tag.attrib['v'] node_tags['type'] = tag.attrib['k'].split(':', 1)[0] tags.append(node_tags) elif PROBLEMCHARS.match(tag.attrib['k']): continue else: node_tags['id'] = element.attrib['id'] node_tags['key'] = tag.attrib['k'] node_tags['value'] = tag.attrib['v'] node_tags['type'] = default_tag_type tags.append(node_tags) return {'node': node_attribs, 'node_tags': tags} # WAY element elif element.tag == 'way': for attribute in element.attrib: if attribute in WAY_FIELDS: way_attribs[attribute] = element.attrib[attribute] nd_pos = 1 for child in element: way_node = {} way_tag = {} if child.tag == 'nd': way_node['id'] = element.attrib['id'] way_node['node_id'] = child.attrib['ref'] way_node['position'] = nd_pos nd_pos += 1 way_nodes.append(way_node) elif child.tag == 'tag': if LOWER_COLON.match(child.attrib['k']): way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'].split(':', 1)[1] if child.attrib['k'] == "addr:street": way_tag['value'] = audit.update_name( child.attrib['v'], audit.mapping) elif child.attrib['k'] == "addr:postcode": way_tag['value'] = audit.update_postcode( child.attrib['v']) else: way_tag['value'] = child.attrib['v'] way_tag['type'] = child.attrib['k'].split(':', 1)[0] tags.append(way_tag) elif PROBLEMCHARS.match(child.attrib['k']): continue else: way_tag['id'] = element.attrib['id'] way_tag['key'] = child.attrib['k'] way_tag['value'] = child.attrib['v'] way_tag['type'] = default_tag_type tags.append(way_tag) return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element): node = {} if element.tag == "node" or element.tag == "way" : node['type'] = element.tag created = {} # create position key from lat lon data try: node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])] except: pass # Process attributes of node/way tag for k,v in element.attrib.iteritems(): # but creation data in sub-dictionary if k in CREATED: created[k] = v node['created'] = created #ignore lat, lon (already processed) elif k in ['lat', 'lon']: continue # other attributes else: node[k] = v # Process child 'tag' tags for tag in element.iter('tag'): # skip tags where the key contains problem characters - write to error file? if problemchars.search(tag.attrib['k']): continue # Check for address tags elif re.compile(r'addr:').match(tag.attrib['k']): if not ('address' in node): node['address'] = {} if lower_colon.search(tag.attrib['k'][5:]): continue # Need to clean street names/zip if applicable if audit.is_street_name(tag): name = audit.update_name(tag.attrib['v']) node['address'][tag.attrib['k'][5:]] = name elif audit.is_zip(tag): zip = clean_zip(tag.attrib['v']) #if zip == "Bad Zip": # continue node['address'][tag.attrib['k'][5:]] = zip else: node['address'][tag.attrib['k'][5:]] = tag.attrib['v'] # Check for is_in tags elif re.compile(r'is_in').match(tag.attrib['k']): if not ('is_in' in node): node['is_in'] = {} # split 'is_in' list values into appropriate keys if tag.attrib['k'] == 'is_in': if diego_re.search(tag.attrib['v']): #compile diego regx node['is_in']['city'] = 'San Diego' if usa_re.search(tag.attrib['v']): node['is_in']['country'] = 'United States of America' node['is_in']['country_code'] = 'US' if ca_re.search(tag.attrib['v']): node['is_in']['state'] = 'California' node['is_in']['state_code'] = 'CA' if node['is_in'] == {}: node['is_in']['city'] = tag.attrib['v'] #catch for baja and lakeside elif lower_colon.search(tag.attrib['k']): key, val = clean_is_in(tag) node['is_in'][key] = val # Check for GNIS data - including ele tag elif audit.is_gnis(tag): if not ('gnis' in node): node['gnis'] = {} key, val = clean_gnis(tag) node['gnis'][key] = val # all other tags - be careful that 'type' does not get overwritten here elif tag.attrib['k'] == 'type': node['tag_type'] = tag.attrib['v'] else: node[tag.attrib['k']] = tag.attrib['v'] # node_refs for ways if element.tag == "way" : node['node_refs'] = [] for nd in element.iter('nd'): node['node_refs'].append(nd.attrib['ref']) return node else: return None #relations not returned?
def shape_element(element): node = {} # Allows only three basic top level elements if element.tag in ('node', 'way', 'relation'): # Adding type node["type"] = element.tag # Adding attribues - generals node["id"] = element.attrib["id"] if "visible" in element.attrib: node["visible"] = element.attrib["visible"] # Adding attribues - exceptions #1 'created' created = {} for key in CREATED: created[key] = element.attrib[key] # convert from date string to datetime object created['timestamp'] = dateutil.parser.parse(element.attrib['timestamp']) node["created"] = created # Adding attribues - exceptions #2 shaping position if "lat" in element.attrib and "lon" in element.attrib: node["pos"] = [float(element.attrib["lat"]), float(element.attrib["lon"])] # Adding child elements node_refs = [] address = {} members = [] for child in element: # Auditing and shaping "tag" elements if child.tag == "tag": k = child.attrib['k'] # Ignoring key including problematic characters if re.search(problemchars, k) != None: continue # Ignoring key including upper case characters if re.search(lower, k) != None: # Handling confilcts when second level tag "k" value is 'type' if k == 'type': node[element.tag + '_type'] = child.attrib['v'] else: node[k] = child.attrib['v'] # Ignoring key including problematic characters if re.search(lower_colon, k) != None: if k.startswith("addr:"): if len(k.split(":")) == 2 : v = child.attrib['v'] # cleaning street if k == "addr:street": v = audit.update_name(v, audit.mapping) address[k.split(":")[1]] = v else: node[k] = child.attrib['v'] # for 'way' elif child.tag == "nd": node_refs.append(child.attrib["ref"]) # for 'relation' elif child.tag == 'member': member = {} member['ref'] = child.attrib['ref'] member['role'] = child.attrib['role'] member['type'] = child.attrib['type'] members.append(member) if len(node_refs) > 0: node["node_refs"] = node_refs if len(address) > 0: node["address"] = address if len(members) > 0: node["members"] = members return node else: return None