Example #1
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new dict 
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    new['value'] = secondary.attrib['v']

    # Cleaning streetnames phone number and postal codes
    if is_street_name(secondary):
        street_name = update_street(secondary.attrib['v'])
        new['value'] = street_name

    if new['key'] == 'phone':
        phone_num = updating_phone(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num

    if new['key'] == 'postcode':
        post_code = updating_postal(secondary.attrib['v'])
        if post_code is not None:
            new['value'] = post_code
    return new
Example #2
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'], mapping)
        new['value'] = street_name

    elif new['key'] == 'province':
        # Change Texas to TX
        province = secondary.attrib['v']
        if province == 'Texas':
            province = 'TX'
        new['value'] = province

    else:
        new['value'] = secondary.attrib['v']

    return new
Example #3
0
def add_tag(elem, elem_2, default_tag_type):
    new_tag = {}
    new_tag['id'] = elem.attrib['id']
    # check for ":"
    if elem_2.attrib['k'].find(":") >= 0:
        sec_idx = elem_2.attrib['k'].index(":")
        new_tag['key'] = elem_2.attrib['k'][sec_idx + 1:]
        new_tag['type'] = elem_2.attrib['k'][:sec_idx]
    else:
        new_tag['key'] = elem_2.attrib['k']
        new_tag['type'] = default_tag_type

    # Update street name
    if is_street_name(elem_2):
        street_name = update_name(elem_2.attrib['k'], mapping)
        new_tag['value'] = street_name

    # Update postal code
    elif is_postcode(elem_2):
        #print secondary.attrib['v']
        post_code = update_postcode(elem_2.attrib['v'])
        new_tag['value'] = post_code
    # Update phone
    elif is_phone(elem_2):
        #print secondary.attrib['v']
        phone_num = update_phone(elem_2.attrib['v'])
        new_tag['value'] = phone_num
    else:
        new_tag['value'] = elem_2.attrib['v']

    #print secondary.attrib['v']
    return new_tag
def tag_attributes(element, default_tag_type, child):
    tg = {}
    tg['id'] = element.attrib['id']
    if ':' not in child.attrib['k']:
        tg['key'] = child.attrib['k']
        tg['type'] = default_tag_type
    else:
        colpos = child.attrib['k'].index(':')
        pos_col = colpos + 1
        tg['key'] = child.attrib['k'][pos_col:]
        tg['type'] = child.attrib['k'][:colpos]
    if is_street_name(child):
        street_name = update_name(child.attrib['v'])
        tg['value'] = street_name

###====Code to remove invalid postal code====###

    elif tg['key']=='postcode':
        pin_code=child.attribute['v']
        m=POST_CODE.match(pin_code)
        if m is not None:
            if len(pin_code)==6:
                tg['value']=pin_code

            else:
                return None
####===============xxxxxxx====================####

    else:
        tg['value'] = child.attrib['v']
    return tg
def load_new_tag(element, secondary, default_tag_type):

    # Creating a new tag dict to go along with the ways_tags and nodes_tags

    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    # Postal Code Updation
    elif secondary.attrib['k'][6:] == 'postcode':
        secondary.attrib['v'] = clean_postcode(secondary.attrib['v'])
        new['value'] = post_code

    else:
        new['value'] = secondary.attrib['v']

    return new
Example #6
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    elif new['key'] == 'phone':
        phone_num = phone_cleaned(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None

    else:
        new['value'] = secondary.attrib['v']

    return new
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way":

        # YOUR CODE HERE
        node['id'] = element.attrib['id']
        node['type'] = element.tag

        if 'visible' in element.attrib:
            node['visible'] = element.attrib['visible']

        node['created'] = {}
        for c in CREATED:
            node['created'][c] = element.attrib[c]

        if 'lat' in element.attrib:
            node['pos'] = [
                float(element.attrib['lat']),
                float(element.attrib['lon'])
            ]

        if element.find("tag") != None:
            for tag in element.iter("tag"):
                if lower_colon.match(tag.attrib['k']
                                     ) and tag.attrib['k'].startswith("addr:"):
                    if 'address' not in node:
                        node['address'] = {}

                    node['address'][tag.attrib['k'].split(":")
                                    [1]] = tag.attrib['v']
                    value = tag.attrib['v']

                    if audit.is_street_name(tag):
                        value, st_type = audit.update_name(
                            value, audit.mapping)

                        # Update the street name
                        node['address']['street'] = value

                        # Ignore if st_type is not expected
                        if st_type not in audit.expected:
                            continue

                        # Insert st_type to node for analysis
                        node['address']['st_type'] = st_type
                elif lower.match(tag.attrib['k']
                                 ) and not tag.attrib['k'].startswith("addr:"):
                    node[tag.attrib['k']] = tag.attrib['v']

        if element.find("nd") != None:
            node["node_refs"] = []
            for nd in element.iter("nd"):
                node["node_refs"].append(nd.attrib['ref'])

        return node
    else:
        return None
Example #8
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name
    
    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None
    
    elif new['key'] == 'province':
        # Change Ontario to ON
        province = secondary.attrib['v']
        if province == 'Ontario':
            province = 'ON'
        new['value'] = province

    elif new['key'] == 'postcode':
        post_code = secondary.attrib['v'].strip()
        m = POSTCODE.match(post_code)
        if m is not None:
            # Add space in middle if there is none
            if " " not in post_code:
                post_code = post_code[:3] + " " + post_code[3:]
            # Convert to upper case
            new['value'] = post_code.upper()
        else:
            # Keep zip code revealed in postal code audit for document deletion purposes
            if post_code[:5] == "14174":
                new['value'] = post_code
            # Ignore tag if improper postal code format
            else:
                return None

    else:
        new['value'] = secondary.attrib['v']
    
    return new
Example #9
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name

    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None

    elif new['key'] == 'province':
        # Change Ontario to ON
        province = secondary.attrib['v']
        if province == 'Ontario':
            province = 'ON'
        new['value'] = province

    elif new['key'] == 'postcode':
        post_code = secondary.attrib['v'].strip()
        m = POSTCODE.match(post_code)
        if m is not None:
            # Add space in middle if there is none
            if " " not in post_code:
                post_code = post_code[:3] + " " + post_code[3:]
            # Convert to upper case
            new['value'] = post_code.upper()
        else:
            # Keep zip code revealed in postal code audit for document deletion purposes
            if post_code[:5] == "14174":
                new['value'] = post_code
            # Ignore tag if improper postal code format
            else:
                return None

    else:
        new['value'] = secondary.attrib['v']

    return new
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way":

        # YOUR CODE HERE
        node['id'] = element.attrib['id']
        node['type'] = element.tag

        if 'visible' in element.attrib:
            node['visible'] = element.attrib['visible']

        node['created'] = {}
        for c in CREATED:
            node['created'][c] = element.attrib[c]

        if 'lat' in element.attrib:
            node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])]

        if element.find("tag") != None:
            for tag in element.iter("tag"):
                if lower_colon.match(tag.attrib['k']) and tag.attrib['k'].startswith("addr:"):
                    if 'address' not in node:
                        node['address'] = {}

                    node['address'][tag.attrib['k'].split(":")[1]] = tag.attrib['v']
                    value = tag.attrib['v']


                    if audit.is_street_name(tag):
                        value, st_type = audit.update_name(value, audit.mapping)

                        # Update the street name
                        node['address']['street'] = value

                        # Ignore if st_type is not expected
                        if st_type not in audit.expected:
                            continue

                        # Insert st_type to node for analysis
                        node['address']['st_type'] = st_type
                elif lower.match(tag.attrib['k']) and not tag.attrib['k'].startswith("addr:"):
                    node[tag.attrib['k']] = tag.attrib['v']

        if element.find("nd") != None:
            node["node_refs"] = []
            for nd in element.iter("nd"):
                node["node_refs"].append(nd.attrib['ref'])

        return node
    else:
        return None
Example #11
0
def load_new_tag(element, secondary, default_tag_type):
    """
    Load a new tag dict to go into the list of dicts for way_tags, node_tags
    """
    new = {}
    new['id'] = element.attrib['id']
    if ":" not in secondary.attrib['k']:
        new['key'] = secondary.attrib['k']
        new['type'] = default_tag_type
    else:
        post_colon = secondary.attrib['k'].index(":") + 1
        new['key'] = secondary.attrib['k'][post_colon:]
        new['type'] = secondary.attrib['k'][:post_colon - 1]

    # Cleaning and loading values of various keys
    if is_street_name(secondary):
        # Why don't i need to use mapping, street_mapping,
        # and num_line_mapping dicts  as params?
        street_name = update_name(secondary.attrib['v'])
        new['value'] = street_name
    
    elif new['key'] == 'phone':
        phone_num = update_phone_num(secondary.attrib['v'])
        if phone_num is not None:
            new['value'] = phone_num
        else:
            return None
    elif new['key'] == 'city':
        city = update_city(secondary.attrib['v'])
        new['value'] = city
#    elif new['key'] == 'housenumber':
#        housenumber = update_housenumber(secondary.attrib['v'])
#        new['value'] = housenumber
#    elif new['key'] == 'province':
#        # Change Ontario to ON
#        province = secondary.attrib['v']
#        if province == 'Ontario':
#            province = 'ON'
#        new['value'] = province
#
    elif new['key'] == 'postcode':
        post_code = update_postcode(secondary.attrib['v'])        
        new['value'] = post_code
        

    else:
        new['value'] = secondary.attrib['v']
    
    return new
Example #12
0
def audit2(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if project_audit.is_street_name(tag):
                    project_audit.audit_street_type(street_types, tag.attrib['v'])

    type_count = {}
    for s in street_types:
        if s not in type_count:
            type_count[s] = 0
        type_count[s] = len(street_types[s])

    return street_types, type_count
def process_children(node, element):
    """ Helper function that builts the JSON inside the node object parameter. Data about
        the address field is retrieve from the element parameter
    """
    # address dictionary to be inserted
    address = {} 

    node_refs = []
    # iterate through every child of passed element
    for child in element:
    	# if the child is a 'tag' tag
        if child.tag == 'tag':
        	# iterate through all child.attributes
            for attrib in child.attrib:
            	# if attribute is in ADDRESS list, add key and value to the address
                if child.attrib[attrib] in ADDRESS:
                	# first update street name for consistency
                	if is_street_name(child) and is_street_name in mapping:
                		address[child.attrib['k'][5:]] = update_name(child.attrib['v'], mapping)
                    	else:
                    		address[child.attrib['k'][5:]] = child.attrib['v']
                # else, just add the value of the attribute to the node
                elif not child.attrib['k'].startswith(ADDRESS_PREFIX):
                	key = child.attrib['k']
                	if '.' in key:
        				key = remove_dots(key)

                	node[key] = child.attrib['v']

        if child.tag == 'nd':
        	for attrib in child.attrib:
                    node_refs.append(child.attrib[attrib])
        # insert node refs when applicable 
        if node_refs != []:
        	node['node_refs'] = node_refs
        # insert address when applicable
        if address != {}:
            node['address'] = address
Example #14
0
def shape_element(element):
    node = {}
    if element.tag == "node" or element.tag == "way" :
        node['type'] = element.tag
        created = {}
        
        # create position key from lat lon data
        try:
            node['pos'] = [float(element.attrib['lat']), float(element.attrib['lon'])]
        except:
            pass
        
        # Process attributes of node/way tag
        for k,v in element.attrib.iteritems():
        	# but creation data in sub-dictionary
            if k in CREATED:
                created[k] = v
                node['created'] = created
            #ignore lat, lon (already processed)
            elif k in ['lat', 'lon']:
                continue 
            # other attributes
            else:
                node[k] = v
        
        # Process child 'tag' tags
        for tag in element.iter('tag'):
        
        	# skip tags where the key contains problem characters - write to error file?
            if problemchars.search(tag.attrib['k']):
                continue
        
        	# Check for address tags
            elif re.compile(r'addr:').match(tag.attrib['k']):
                if not ('address' in node):
                    node['address'] = {}
                if lower_colon.search(tag.attrib['k'][5:]):
                    continue

                # Need to clean street names/zip if applicable
                if audit.is_street_name(tag):
                    name = audit.update_name(tag.attrib['v'])
                    node['address'][tag.attrib['k'][5:]] = name
                elif audit.is_zip(tag):
                    zip = clean_zip(tag.attrib['v'])
                    #if zip == "Bad Zip":                           
                    #    continue
                    node['address'][tag.attrib['k'][5:]] = zip
                else:
                    node['address'][tag.attrib['k'][5:]] = tag.attrib['v']
        
            # Check for is_in tags
            elif re.compile(r'is_in').match(tag.attrib['k']):
                if not ('is_in' in node):
                    node['is_in'] = {}
                
                # split 'is_in' list values into appropriate keys
                if tag.attrib['k'] == 'is_in':
                    if diego_re.search(tag.attrib['v']): #compile diego regx
                        node['is_in']['city'] = 'San Diego'
                    if usa_re.search(tag.attrib['v']):
                        node['is_in']['country'] = 'United States of America'
                        node['is_in']['country_code'] = 'US'
                    if ca_re.search(tag.attrib['v']):
                        node['is_in']['state'] = 'California'
                        node['is_in']['state_code'] = 'CA'
                    if node['is_in'] == {}:
                        node['is_in']['city'] = tag.attrib['v'] #catch for baja and lakeside
                elif lower_colon.search(tag.attrib['k']):
                    key, val = clean_is_in(tag)
                    node['is_in'][key] = val

            # Check for GNIS data - including ele tag
            elif audit.is_gnis(tag):
                if not ('gnis' in node):
                    node['gnis'] = {}
                key, val = clean_gnis(tag)
                node['gnis'][key] = val


            # all other tags - be careful that 'type' does not get overwritten here
            elif tag.attrib['k'] == 'type':
            	node['tag_type'] = tag.attrib['v']
            else:
                node[tag.attrib['k']] = tag.attrib['v']
        
        # node_refs for ways        
        if element.tag == "way" :
            node['node_refs'] = []
            for nd in element.iter('nd'):
                node['node_refs'].append(nd.attrib['ref'])
        return node
    else:
        return None #relations not returned?
def shape_element(element, node_attr_fields=NODE_FIELDS, way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS, default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []  # Handle secondary tags the same way for both node and way elements

    # parse nodes
    if element.tag == 'node':
      for k, v in element.attrib.iteritems():
        if k in NODE_FIELDS:
          node_attribs[k] = v
    
    # parse ways
    if element.tag == 'way':
      for k, v in element.attrib.iteritems():
        if k in WAY_FIELDS:
          way_attribs[k] = v

      i = 0
      for nd in element.iter('nd'):
        temp = {}
        temp['id'] = element.attrib['id']
        temp['node_id'] = nd.attrib['ref']
        temp['position'] = i
        way_nodes.append(temp)
        i += 1

    # parse secondary tags
    for tag in element.iter("tag"):
      k = tag.attrib['k']
      v = tag.attrib['v']

      if PROBLEMCHARS.match(k):
        continue

      temp = {}
      temp['id'] = element.attrib['id']

      if LOWER_COLON.match(k):
        k_array = k.split(':', 1)
        temp['key'] = k_array[1]
        temp['type'] = k_array[0]
      else:
        temp['key'] = k
        temp['type'] = default_tag_type

      # update selected fields based on mappings from audit.py
      if audit.is_street_name(tag):
       temp['value'] = audit.update_street_type(v)
      elif audit.is_postal_code(tag):
       temp['value'] = audit.update_postal_code(v)
      elif audit.is_city(tag):
       temp['value'] = audit.update_city(v)
      elif audit.is_state(tag):
       temp['value'] = audit.update_state(v)
      else:
       temp['value'] = v

      tags.append(temp)


    if element.tag == 'node':
        return {'node': node_attribs, 'node_tags': tags}
    elif element.tag == 'way':
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = []

    if element.tag == 'node':
        node_atts = element.attrib
        node_children = element.getchildren()
        for field in node_attr_fields:
            if field not in node_atts and field == 'user':
                node_attribs[field] = 'NO_USER'
            elif field not in node_atts and field == 'uid':
                node_attribs[field] = 0
            else:
                node_attribs[field] = node_atts[field]

            if len(node_children) == 0:
                tags = []
            else:
                for child in node_children:
                    child_dict = {}
                    child_atts = child.attrib
                    for field in NODE_TAGS_FIELDS:
                        if field == 'id':
                            child_dict[field] = node_atts[field]
                        elif field == 'key':
                            if ':' in child_atts['k']:
                                child_dict['type'], child_dict[field] = \
                                    child_atts['k'].split(':', 1)
                            else:
                                child_dict[field] = child_atts['k']
                                child_dict['type'] = 'regular'
                        elif field == 'value':
                            if audit.is_street_name(child):
                                child_dict[field] = audit.update_name(
                                    child_atts['v'], audit.mapping)
                            elif audit.is_up_for_fixing(child):
                                child_dict[field] = audit.fix(child_atts['v'])
                            else:
                                child_dict[field] = child_atts['v']
                    tags.append(child_dict)
        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':
        way_atts = element.attrib
        way_children = element.getchildren()
        for field in way_attr_fields:
            way_attribs[field] = way_atts[field]
            if len(way_children) == 0:
                tags = []
                way_nodes = []
            else:
                counter = 0
                for child in way_children:
                    if child.tag == 'tag':
                        child_dict = {}
                        child_atts = child.attrib
                        for field in WAY_TAGS_FIELDS:
                            if field == 'id':
                                child_dict[field] = way_atts[field]
                            elif field == 'key':
                                if ':' in child_atts['k']:
                                    child_dict['type'], child_dict[field] = \
                                        child_atts['k'].split(':', 1)
                                else:
                                    child_dict[field] = child_atts['k']
                                    child_dict['type'] = 'regular'
                            elif field == 'value':
                                if audit.is_street_name(child):
                                    child_dict[field] = audit.update_name(
                                        child_atts['v'], audit.mapping)
                                elif audit.is_up_for_fixing(child):
                                    child_dict[field] = audit.fix(
                                        child_atts['v'])
                                else:
                                    child_dict[field] = child_atts['v']
                        tags.append(child_dict)
                    elif child.tag == 'nd':
                        child_dict = {}
                        child_atts = child.attrib
                        for field in WAY_NODES_FIELDS:
                            if field == 'id':
                                child_dict[field] = way_atts[field]
                            elif field == 'node_id':
                                child_dict[field] = child_atts['ref']
                            else:
                                child_dict[field] = counter
                                counter += 1
                        way_nodes.append(child_dict)
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}
Example #17
0
def get_tags(element, unique_id, problem_chars, default_tag_type):

    tags = []
    # NODE/WAY_TAGS_FIELDS
    for tag in element.iter("tag"):
        t = {}
        # NODE/WAY_TAGS_FIELDS[0]:id
        # id maps to the top level node/way id attribute value
        t["id"] = unique_id

        # NODE/WAY_TAGS_FIELDS[1]:key
        # if there's no ":" key maps to the full "k" attribute
        # if there's ":" key only maps to the characters after the colon
        # NODE/WAY_TAGS_FIELDS[3]:type
        # type maps to the characters before the colon in the tag
        # type equals "regular" if there's no ":"
        k = tag.attrib["k"]
        m = problem_chars.search(k)
        if not m:
            if ":" not in k:
                t["key"] = k
                t["type"] = default_tag_type
            else:
                cut = k.find(":") + 1
                t["key"] = k[cut:]
                t["type"] = k[:cut - 1]
        else:
            t["type"] = default_tag_type

        # NODE/WAY_TAGS_FIELDS[2]:value
        v = tag.attrib["v"]

        if audit.is_postcode(tag):
            if len(v) != 5:
                v = None
            elif v[:2] != "08":
                v = None

        if audit.is_street_name(tag):
            st_type = get_street_type(v)
            st_name = v[len(st_type) + 1:]

            if st_type.lower() in LANG_MAPPING:
                v = fix_lang(st_type, st_name)

            elif st_type.lower() in EXPECTED:
                st_type = fix_case(st_type)
                st_name = fix_case(st_name)
                v = st_type + " " + st_name

            elif st_type.lower() in MAPPING:
                st_type = MAPPING[st_type.lower()]
                st_name = fix_case(st_name)
                v = st_type + " " + st_name

            else:
                st_type = fix_case(st_type)
                uncaught_st.add(st_type)

        # value maps to the full "v" attribute
        t["value"] = v
        tags.append(t)
    return tags
Example #18
0
def shape_element(element):
    node = {}
    
    if element.tag == "node" or element.tag == "way" :

        # created and position elements: 
        node['type'] = element.tag
        
        for attribute in element.attrib:


            #created dictionary
            if attribute in CREATED: 
                if 'created' not in node:
                    node['created'] = {}
                #timestamp is not json serializeable    
                if attribute == "timestamp":
                    node['created'][attribute] = str(element.attrib[attribute])

                else: 
                    node['created'][attribute] = element.attrib[attribute]

            #position list
            elif attribute in COORDINATES:
                if 'pos' not in node:
                    node['pos'] = [None,None]
                    
                if attribute == 'lat':
                    node['pos'][0] = float(element.attrib[attribute])
                if attribute == 'lon': 
                    node['pos'][1] = float(element.attrib[attribute])
            else:
                node[attribute] = element.attrib[attribute]
     
        #iterate over child-tags:   
        for tag in element.iter("tag"):
            if not problemchars.search(tag.attrib['k']):

                # Tags with single colon and beginning with addr
                if lower_colon.search(tag.attrib['k']) and tag.attrib['k'].find('addr') == 0:
                    if 'address' not in node:
                        node['address'] = {}

                    small_attribute = tag.attrib['k'].split(':', 1)

                    if audit.is_street_name(tag):
                        better_name = audit.update_name(tag.attrib['v'], audit.mapping_road)
                        better_name_direction = audit.update_direction(better_name, audit.mapping_directions)
                        node['address'][small_attribute[1]] = better_name_direction

                    else:    
                        node['address'][small_attribute[1]] = tag.attrib['v']

                # All other tags that don't begin with "addr"
                elif not tag.attrib['k'].find('addr') == 0:
                    if tag.attrib['k'] not in node:
                        node[tag.attrib['k']] = tag.attrib['v']
                else:
                    node["tag:" + tag.attrib['k']] = tag.attrib['v']
            
                    
       # change node_refs in way elements
        for nd in element.iter("nd"):
            if 'node_refs' not in node:
                node['node_refs'] = []
            node['node_refs'].append(nd.attrib['ref'])      
        return node
        
    else:
        return None
Example #19
0
def shape_element(element,
                  node_attr_fields=NODE_FIELDS,
                  way_attr_fields=WAY_FIELDS,
                  problem_chars=PROBLEMCHARS,
                  tag_attr_fields=TAGS_FIELDS,
                  way_node_attr_fields=WAY_NODES_FIELDS,
                  default_tag_type='regular'):
    """Clean and shape node or way XML element to Python dict"""

    node_attribs = {}
    way_attribs = {}
    way_nodes = []
    tags = [
    ]  # Handle secondary tags the same way for both node and way elements

    if element.tag == 'node':
        for attribute in node_attr_fields:  # shape the node
            if attribute not in element.attrib:
                node_attribs[attribute] = "None"
            else:
                node_attribs[attribute] = element.attrib[attribute]

        for child in element._children:  # shape the node tags
            if child.tag == 'tag':
                if problem_chars.search(
                        child.attrib['k']):  # handles problematic characters
                    break
                tag_attribs = dict.fromkeys(tag_attr_fields)
                tag_attribs['id'] = element.attrib[
                    'id']  # get the id from the root element
                key = child.attrib['k']
                colon_pos = child.attrib['k'].find(
                    ':')  # handles first colon (if it exists)
                if colon_pos < 0:
                    tag_attribs['key'] = key
                    tag_attribs['type'] = default_tag_type
                else:
                    tag_attribs['key'] = key[:colon_pos]
                    tag_attribs['type'] = key[colon_pos + 1:]

                if is_street_name(
                        child
                ):  # update the problematic values, using our audit.py functions
                    tag_attribs['value'] = update_street_name(
                        child.attrib['v'])
                elif is_postcode(child):
                    tag_attribs['value'] = update_postcode(child.attrib['v'])
                else:
                    tag_attribs['value'] = child.attrib['v']
                tags.append(
                    tag_attribs)  # add the iteration tag dict to the tags list
        return {'node': node_attribs, 'node_tags': tags}

    elif element.tag == 'way':  # shape the way
        for attribute in way_attr_fields:
            if attribute not in element.attrib:
                way_attribs[attribute] = "None"
            else:
                way_attribs[attribute] = element.attrib[attribute]

        for i, child in enumerate(element._children):  # shape the way tags
            if child.tag == 'tag':
                if problem_chars.search(
                        child.attrib['k']):  # handles problematic characters
                    continue
                tag_attribs = dict.fromkeys(tag_attr_fields)
                tag_attribs['id'] = element.attrib[
                    'id']  # get the id from the root element
                key = child.attrib['k']
                colon_pos = child.attrib['k'].find(
                    ':')  # handles first colon (if it exists)
                if colon_pos < 0:
                    tag_attribs['key'] = key
                    tag_attribs['type'] = default_tag_type
                else:
                    tag_attribs['key'] = key[:colon_pos]
                    tag_attribs['type'] = key[colon_pos + 1:]

                if is_street_name(
                        child
                ):  # update the problematic values, using our audit.py functions
                    tag_attribs['value'] = update_street_name(
                        child.attrib['v'])
                elif is_postcode(child):
                    tag_attribs['value'] = update_postcode(child.attrib['v'])
                else:
                    tag_attribs['value'] = child.attrib['v']
                tags.append(
                    tag_attribs)  # add the iteration tag dict to the tags list

            elif child.tag == 'nd':  # shape the way nodes
                way_node_attribs = dict.fromkeys(way_node_attr_fields)
                way_node_attribs['id'] = element.attrib['id']
                way_node_attribs['node_id'] = child.attrib['ref']
                way_node_attribs['position'] = i
                way_nodes.append(way_node_attribs)
        return {'way': way_attribs, 'way_nodes': way_nodes, 'way_tags': tags}