def audit_strnames(filename, logfile=None): """ Audit street names, check validity and mistypes """ logger = hlp.get_logger(logfile) if logfile else None strnames = set() stats = defaultdict(int) for element in hlp.get_element(filename, tags=('node', 'way')): id = element.attrib["id"] strname, pcode = hlp.get_tags_data(element, [('addr:street', 'street'), ('addr:postcode', 'postcode')]) if strname: valid, msges = chk_valid_strname(strname) if not valid: for msg in msges: add_stat(logger, stats, msg, element.tag, id, strname) strnames.add((strname, pcode)) strnames = list(strnames) for i, name1 in enumerate(strnames): for name2 in strnames[i+1:]: if is_equalmstp(name1[0], name2[0]) and (name1[1]==name2[1]): msg = "Mistyped street names" add_stat(logger, stats, msg, None, None, u'({},{}), pcode = {}'.format(name1[0], name2[0], name1[1]) ) if is_equallw(name1[0], name2[0]): msg = "Capital Lower letters Difference" # This case is not logged add_stat(None, stats, msg, None, None, u'({},{})'.format(name1, name2)) #do not log these cases return stats
def audit_addrnum(filename, logfile=None): """ Audit address house numbers, check validity, completeness and consistency """ stats = defaultdict(int) logger = hlp.get_logger(logfile) if logfile else None for element in hlp.get_element(filename, tags=('node', 'way')): id = element.attrib["id"] addr = get_addrnum(element) #Check if any address numbers are given if not any(addr): continue valid, msges = chk_valid(addr) if not valid: for msg in msges: add_stat(logger, stats, msg, element.tag, id, addr) continue for chk in [chk_complete, chk_consist]: valid, msges = chk(addr) if not valid: for msg in msges: add_stat(logger, stats, msg, element.tag, id, addr) return stats
def test_chk_valid(): test_filename = 'test_data\\test_chk_valid.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_valid(addr) results.append((valid, msges)) assert_equal(results[0][0], True) assert_equal(results[0][1], []) assert_equal(results[11][0], False) assert_equal(results[11][1], [ "VALIDITY:Both cnsnumber and prvnumber are given", "VALIDITY: Not valid type of housenumber" ]) assert_equal(results[12][0], False) assert_equal(results[12][1], [ "VALIDITY: Not valid type of streetnumber", "VALIDITY: Not valid type of conscriptionnumber or provisionalnumber" ]) assert_equal(results[9][0], True) assert_equal(results[9][1], []) assert_equal(results[10][0], True) assert_equal(results[10][1], [])
def audit_strnames(filename, logfile=None): """ Audit street names, check validity and mistypes """ logger = hlp.get_logger(logfile) if logfile else None strnames = set() stats = defaultdict(int) for element in hlp.get_element(filename, tags=('node', 'way')): id = element.attrib["id"] strname, pcode = hlp.get_tags_data(element, [('addr:street', 'street'), ('addr:postcode', 'postcode')]) if strname: valid, msges = chk_valid_strname(strname) if not valid: for msg in msges: add_stat(logger, stats, msg, element.tag, id, strname) strnames.add((strname, pcode)) strnames = list(strnames) for i, name1 in enumerate(strnames): for name2 in strnames[i + 1:]: if is_equalmstp(name1[0], name2[0]) and (name1[1] == name2[1]): msg = "Mistyped street names" add_stat( logger, stats, msg, None, None, u'({},{}), pcode = {}'.format(name1[0], name2[0], name1[1])) if is_equallw(name1[0], name2[0]): msg = "Capital Lower letters Difference" # This case is not logged add_stat(None, stats, msg, None, None, u'({},{})'.format(name1, name2)) #do not log these cases return stats
def audit(self, filename): timing = {key : 0 for key in self._names} audit_result = {name: defaultdict(int) for name in self._names } for element in hlp.get_element(filename): for i, auditor in enumerate(self._auditors): name = self._names[i] start = time() prev_res = audit_result[name] audit_result[name] = auditor(element, prev_res) timing[name] = timing[name] + (time() - start) return audit_result, timing
def test_get_addrnum(): addr_list = [] test_filename = 'test_data\\test_data.osm' for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) addr_list.append(addr) AddrNum = namedtuple('AddrNum', 'hsnumber, cnsnumber, prvnumber, streetnumber') assert_equal(addr_list[0], AddrNum(None,None,None,None)) assert_equal(addr_list[9], AddrNum('15',None,None,None)) assert_equal(addr_list[10], AddrNum('1265/21','1265',None,'21'))
def audit(self, filename): timing = {key: 0 for key in self._names} audit_result = {name: defaultdict(int) for name in self._names} for element in hlp.get_element(filename): for i, auditor in enumerate(self._auditors): name = self._names[i] start = time() prev_res = audit_result[name] audit_result[name] = auditor(element, prev_res) timing[name] = timing[name] + (time() - start) return audit_result, timing
def test_get_addrnum(): addr_list = [] test_filename = 'test_data\\test_data.osm' for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) addr_list.append(addr) AddrNum = namedtuple('AddrNum', 'hsnumber, cnsnumber, prvnumber, streetnumber') assert_equal(addr_list[0], AddrNum(None, None, None, None)) assert_equal(addr_list[9], AddrNum('15', None, None, None)) assert_equal(addr_list[10], AddrNum('1265/21', '1265', None, '21'))
def audit_postcodes(filename, logfile=None): """ Audit postcodes, check validity """ stats = defaultdict(int) logger = hlp.get_logger(logfile) if logfile else None for element in hlp.get_element(filename, tags=('node', 'way')): id = element.attrib["id"] pcode, = hlp.get_tags_data(element, [('addr:postcode', 'postcode')]) if pcode: valid, msges = chk_valid_postcode(pcode) if not valid: for msg in msges: add_stat(logger, stats, msg, element.tag, id, pcode) return stats
def process_map(file_in, validate, strfix_dict, logfile=None): """Iteratively process each XML element and write to csv(s)""" fixer = fxad.FixAddress(strfix_dict, logfile) with codecs.open(NODES_PATH, 'w') as nodes_file, \ codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \ codecs.open(WAYS_PATH, 'w') as ways_file, \ codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \ codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file: nodes_writer = hlp.UnicodeDictWriter(nodes_file, NODE_FIELDS) node_tags_writer = hlp.UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS) ways_writer = hlp.UnicodeDictWriter(ways_file, WAY_FIELDS) way_nodes_writer = hlp.UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS) way_tags_writer = hlp.UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS) validator = cerberus.Validator() for element in hlp.get_element(file_in, tags=('node', 'way')): try: el = shape_element(element, fixer=fixer) except Exception as er: print "Failed to shape the following element:\n", ET.tostring( element, encoding='utf-8') print "Catched Exeption:\n", er continue if el: if validate is True: if not valid_element(el, validator): print "Excluding not valid element:\n", ET.tostring( element, encoding='utf-8') continue if element.tag == 'node': nodes_writer.writerow(el['node']) node_tags_writer.writerows(el['node_tags']) elif element.tag == 'way': ways_writer.writerow(el['way']) way_nodes_writer.writerows(el['way_nodes']) way_tags_writer.writerows(el['way_tags'])
def test_chk_consist(): test_filename = 'test_data\\test_chk_consist.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_consist(addr) results.append((valid, msges)) assert_equal(results[10][0], False ) assert_equal(results[10][1], ["CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"]) assert_equal(results[11][0], True ) assert_equal(results[11][1], []) assert_equal(results[12][0], False ) assert_equal(results[12][1], ["CONSISTENCY: One-number hsnumber is not consistent with fstnumber or streetnumber"]) assert_equal(results[13][0], False ) assert_equal(results[13][1], ["CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"])
def test_chk_complete(): test_filename = 'test_data\\test_chk_complete.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_complete(addr) results.append((valid, msges)) assert_equal(results[9][0], False ) assert_equal(results[9][1], ["COMPLETENESS: Missed hsnumber"]) assert_equal(results[10][0], True ) assert_equal(results[10][1], []) assert_equal(results[11][0], False ) assert_equal(results[11][1], ["COMPLETENESS: Missed fstnumber, streetnumber or both"]) assert_equal(results[12][0], False ) assert_equal(results[12][1], ["COMPLETENESS: Missed streetnumber or fstnumber in hsnumber"])
def test_chk_valid(): test_filename = 'test_data\\test_chk_valid.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_valid(addr) results.append((valid, msges)) assert_equal(results[0][0], True ) assert_equal(results[0][1], []) assert_equal(results[11][0], False ) assert_equal(results[11][1], ["VALIDITY:Both cnsnumber and prvnumber are given", "VALIDITY: Not valid type of housenumber"]) assert_equal(results[12][0], False ) assert_equal(results[12][1], ["VALIDITY: Not valid type of streetnumber", "VALIDITY: Not valid type of conscriptionnumber or provisionalnumber"]) assert_equal(results[9][0], True ) assert_equal(results[9][1], []) assert_equal(results[10][0], True ) assert_equal(results[10][1], [])
def process_map(file_in, validate, strfix_dict, logfile=None): """Iteratively process each XML element and write to csv(s)""" fixer = fxad.FixAddress(strfix_dict, logfile) with codecs.open(NODES_PATH, 'w') as nodes_file, \ codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \ codecs.open(WAYS_PATH, 'w') as ways_file, \ codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \ codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file: nodes_writer = hlp.UnicodeDictWriter(nodes_file, NODE_FIELDS) node_tags_writer = hlp.UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS) ways_writer = hlp.UnicodeDictWriter(ways_file, WAY_FIELDS) way_nodes_writer = hlp.UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS) way_tags_writer = hlp.UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS) validator = cerberus.Validator() for element in hlp.get_element(file_in, tags=('node', 'way')): try: el = shape_element(element, fixer=fixer) except Exception as er: print "Failed to shape the following element:\n", ET.tostring(element, encoding='utf-8') print "Catched Exeption:\n", er continue if el: if validate is True: if not valid_element(el, validator): print "Excluding not valid element:\n", ET.tostring(element, encoding='utf-8') continue if element.tag == 'node': nodes_writer.writerow(el['node']) node_tags_writer.writerows(el['node_tags']) elif element.tag == 'way': ways_writer.writerow(el['way']) way_nodes_writer.writerows(el['way_nodes']) way_tags_writer.writerows(el['way_tags'])
def test_chk_complete(): test_filename = 'test_data\\test_chk_complete.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_complete(addr) results.append((valid, msges)) assert_equal(results[9][0], False) assert_equal(results[9][1], ["COMPLETENESS: Missed hsnumber"]) assert_equal(results[10][0], True) assert_equal(results[10][1], []) assert_equal(results[11][0], False) assert_equal(results[11][1], ["COMPLETENESS: Missed fstnumber, streetnumber or both"]) assert_equal(results[12][0], False) assert_equal( results[12][1], ["COMPLETENESS: Missed streetnumber or fstnumber in hsnumber"])
def test_chk_consist(): test_filename = 'test_data\\test_chk_consist.osm' results = [] for element in hlp.get_element(test_filename, tags=('node', 'way')): id = element.attrib["id"] addr = ad.get_addrnum(element) valid, msges = ad.chk_consist(addr) results.append((valid, msges)) assert_equal(results[10][0], False) assert_equal(results[10][1], [ "CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber" ]) assert_equal(results[11][0], True) assert_equal(results[11][1], []) assert_equal(results[12][0], False) assert_equal(results[12][1], [ "CONSISTENCY: One-number hsnumber is not consistent with fstnumber or streetnumber" ]) assert_equal(results[13][0], False) assert_equal(results[13][1], [ "CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber" ])