コード例 #1
0
ファイル: auditaddr.py プロジェクト: tommak/PrgOpenstreetmap
def audit_strnames(filename, logfile=None):
    """ Audit street names, check validity and mistypes """
    logger = hlp.get_logger(logfile) if logfile else None
    strnames = set() 
    stats = defaultdict(int)

    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        strname, pcode = hlp.get_tags_data(element, [('addr:street', 'street'), ('addr:postcode', 'postcode')])
        if strname:
            valid, msges = chk_valid_strname(strname)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, strname)
            strnames.add((strname, pcode))

    
    strnames = list(strnames)
    for i, name1 in enumerate(strnames):
        for name2 in strnames[i+1:]:
            if is_equalmstp(name1[0], name2[0]) and (name1[1]==name2[1]):
                msg = "Mistyped street names"
                add_stat(logger, stats, msg, None, None, u'({},{}), pcode = {}'.format(name1[0], name2[0], name1[1]) )
            if is_equallw(name1[0], name2[0]):
                 msg = "Capital Lower letters Difference"
                 # This case is not logged
                 add_stat(None, stats, msg, None, None, u'({},{})'.format(name1, name2))  #do not log these cases

    return stats
コード例 #2
0
def audit_addrnum(filename, logfile=None):
    """ Audit address house numbers, check validity, completeness and consistency """
    stats = defaultdict(int)

    logger = hlp.get_logger(logfile) if logfile else None

    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = get_addrnum(element)

        #Check if any address numbers are given
        if not any(addr):
            continue

        valid, msges = chk_valid(addr)
        if not valid:
            for msg in msges:
                add_stat(logger, stats, msg, element.tag, id, addr)
            continue

        for chk in [chk_complete, chk_consist]:
            valid, msges = chk(addr)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, addr)

    return stats
コード例 #3
0
def test_chk_valid():
    test_filename = 'test_data\\test_chk_valid.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_valid(addr)
        results.append((valid, msges))

    assert_equal(results[0][0], True)
    assert_equal(results[0][1], [])
    assert_equal(results[11][0], False)
    assert_equal(results[11][1], [
        "VALIDITY:Both cnsnumber and prvnumber are given",
        "VALIDITY: Not valid type of housenumber"
    ])
    assert_equal(results[12][0], False)
    assert_equal(results[12][1], [
        "VALIDITY: Not valid type of streetnumber",
        "VALIDITY: Not valid type of conscriptionnumber or provisionalnumber"
    ])
    assert_equal(results[9][0], True)
    assert_equal(results[9][1], [])
    assert_equal(results[10][0], True)
    assert_equal(results[10][1], [])
コード例 #4
0
def audit_strnames(filename, logfile=None):
    """ Audit street names, check validity and mistypes """
    logger = hlp.get_logger(logfile) if logfile else None
    strnames = set()
    stats = defaultdict(int)

    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        strname, pcode = hlp.get_tags_data(element,
                                           [('addr:street', 'street'),
                                            ('addr:postcode', 'postcode')])
        if strname:
            valid, msges = chk_valid_strname(strname)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, strname)
            strnames.add((strname, pcode))

    strnames = list(strnames)
    for i, name1 in enumerate(strnames):
        for name2 in strnames[i + 1:]:
            if is_equalmstp(name1[0], name2[0]) and (name1[1] == name2[1]):
                msg = "Mistyped street names"
                add_stat(
                    logger, stats, msg, None, None,
                    u'({},{}), pcode = {}'.format(name1[0], name2[0],
                                                  name1[1]))
            if is_equallw(name1[0], name2[0]):
                msg = "Capital Lower letters Difference"
                # This case is not logged
                add_stat(None, stats, msg, None, None,
                         u'({},{})'.format(name1,
                                           name2))  #do not log these cases

    return stats
コード例 #5
0
ファイル: auditaddr.py プロジェクト: tommak/PrgOpenstreetmap
def audit_addrnum(filename, logfile=None):
    """ Audit address house numbers, check validity, completeness and consistency """
    stats = defaultdict(int)
        
    logger = hlp.get_logger(logfile) if logfile else None
    
    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = get_addrnum(element)

        #Check if any address numbers are given
        if not any(addr):
            continue
 
        valid, msges = chk_valid(addr)
        if not valid:
            for msg in msges:
                add_stat(logger, stats, msg, element.tag, id, addr)
            continue

        for chk in [chk_complete, chk_consist]:
            valid, msges = chk(addr)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, addr)
       
    return stats
コード例 #6
0
ファイル: audit.py プロジェクト: tommak/PrgOpenstreetmap
    def audit(self, filename):
        timing = {key : 0 for key in self._names}
        audit_result = {name: defaultdict(int) for name in self._names }
        for element in hlp.get_element(filename):
            for i, auditor in enumerate(self._auditors):
                name = self._names[i]
                start = time()
                prev_res = audit_result[name]
                audit_result[name] = auditor(element, prev_res)
                timing[name] = timing[name] + (time() - start)

        return audit_result, timing
コード例 #7
0
def test_get_addrnum():
    addr_list = []
    test_filename = 'test_data\\test_data.osm'
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)
        addr_list.append(addr)

    AddrNum = namedtuple('AddrNum', 'hsnumber, cnsnumber, prvnumber, streetnumber')
    assert_equal(addr_list[0], AddrNum(None,None,None,None))
    assert_equal(addr_list[9], AddrNum('15',None,None,None))
    assert_equal(addr_list[10], AddrNum('1265/21','1265',None,'21'))
コード例 #8
0
    def audit(self, filename):
        timing = {key: 0 for key in self._names}
        audit_result = {name: defaultdict(int) for name in self._names}
        for element in hlp.get_element(filename):
            for i, auditor in enumerate(self._auditors):
                name = self._names[i]
                start = time()
                prev_res = audit_result[name]
                audit_result[name] = auditor(element, prev_res)
                timing[name] = timing[name] + (time() - start)

        return audit_result, timing
コード例 #9
0
def test_get_addrnum():
    addr_list = []
    test_filename = 'test_data\\test_data.osm'
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)
        addr_list.append(addr)

    AddrNum = namedtuple('AddrNum',
                         'hsnumber, cnsnumber, prvnumber, streetnumber')
    assert_equal(addr_list[0], AddrNum(None, None, None, None))
    assert_equal(addr_list[9], AddrNum('15', None, None, None))
    assert_equal(addr_list[10], AddrNum('1265/21', '1265', None, '21'))
コード例 #10
0
def audit_postcodes(filename, logfile=None):
    """ Audit postcodes, check validity """
    stats = defaultdict(int)
    logger = hlp.get_logger(logfile) if logfile else None

    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        pcode, = hlp.get_tags_data(element, [('addr:postcode', 'postcode')])
        if pcode:
            valid, msges = chk_valid_postcode(pcode)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, pcode)

    return stats
コード例 #11
0
ファイル: auditaddr.py プロジェクト: tommak/PrgOpenstreetmap
def audit_postcodes(filename, logfile=None):
    """ Audit postcodes, check validity """
    stats = defaultdict(int)
    logger = hlp.get_logger(logfile) if logfile else None
    
        
    for element in hlp.get_element(filename, tags=('node', 'way')):
        id = element.attrib["id"]
        pcode, = hlp.get_tags_data(element, [('addr:postcode', 'postcode')])
        if pcode:
            valid, msges = chk_valid_postcode(pcode)
            if not valid:
                for msg in msges:
                    add_stat(logger, stats, msg, element.tag, id, pcode)
                
    return stats
コード例 #12
0
def process_map(file_in, validate, strfix_dict, logfile=None):
    """Iteratively process each XML element and write to csv(s)"""

    fixer = fxad.FixAddress(strfix_dict, logfile)


    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = hlp.UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = hlp.UnicodeDictWriter(nodes_tags_file,
                                                 NODE_TAGS_FIELDS)
        ways_writer = hlp.UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = hlp.UnicodeDictWriter(way_nodes_file,
                                                 WAY_NODES_FIELDS)
        way_tags_writer = hlp.UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        validator = cerberus.Validator()

        for element in hlp.get_element(file_in, tags=('node', 'way')):
            try:
                el = shape_element(element, fixer=fixer)
            except Exception as er:
                print "Failed to shape the following element:\n", ET.tostring(
                    element, encoding='utf-8')
                print "Catched Exeption:\n", er
                continue

            if el:
                if validate is True:
                    if not valid_element(el, validator):
                        print "Excluding not valid element:\n", ET.tostring(
                            element, encoding='utf-8')
                        continue

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])
コード例 #13
0
def test_chk_consist():
    test_filename = 'test_data\\test_chk_consist.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_consist(addr)
        results.append((valid, msges))

    assert_equal(results[10][0], False )
    assert_equal(results[10][1], ["CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"])
    assert_equal(results[11][0], True )
    assert_equal(results[11][1], [])
    assert_equal(results[12][0], False )
    assert_equal(results[12][1], ["CONSISTENCY: One-number hsnumber is not consistent with fstnumber or streetnumber"])
    assert_equal(results[13][0], False )
    assert_equal(results[13][1], ["CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"])
コード例 #14
0
def test_chk_complete():
    test_filename = 'test_data\\test_chk_complete.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_complete(addr)
        results.append((valid, msges))

    assert_equal(results[9][0], False )
    assert_equal(results[9][1], ["COMPLETENESS: Missed hsnumber"])
    assert_equal(results[10][0], True )
    assert_equal(results[10][1], [])
    assert_equal(results[11][0], False )
    assert_equal(results[11][1], ["COMPLETENESS: Missed fstnumber, streetnumber or both"])
    assert_equal(results[12][0], False )
    assert_equal(results[12][1], ["COMPLETENESS: Missed streetnumber or fstnumber in hsnumber"])
コード例 #15
0
def test_chk_valid():
    test_filename = 'test_data\\test_chk_valid.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_valid(addr)
        results.append((valid, msges))

    assert_equal(results[0][0], True )
    assert_equal(results[0][1], [])
    assert_equal(results[11][0], False )
    assert_equal(results[11][1], ["VALIDITY:Both cnsnumber and prvnumber are given", "VALIDITY: Not valid type of housenumber"])
    assert_equal(results[12][0], False )
    assert_equal(results[12][1], ["VALIDITY: Not valid type of streetnumber", "VALIDITY: Not valid type of conscriptionnumber or provisionalnumber"])
    assert_equal(results[9][0], True )
    assert_equal(results[9][1], [])
    assert_equal(results[10][0], True )
    assert_equal(results[10][1], [])
コード例 #16
0
ファイル: data.py プロジェクト: tommak/PrgOpenstreetmap
def process_map(file_in, validate, strfix_dict, logfile=None):
    """Iteratively process each XML element and write to csv(s)"""

    fixer = fxad.FixAddress(strfix_dict, logfile)


    with codecs.open(NODES_PATH, 'w') as nodes_file, \
         codecs.open(NODE_TAGS_PATH, 'w') as nodes_tags_file, \
         codecs.open(WAYS_PATH, 'w') as ways_file, \
         codecs.open(WAY_NODES_PATH, 'w') as way_nodes_file, \
         codecs.open(WAY_TAGS_PATH, 'w') as way_tags_file:

        nodes_writer = hlp.UnicodeDictWriter(nodes_file, NODE_FIELDS)
        node_tags_writer = hlp.UnicodeDictWriter(nodes_tags_file, NODE_TAGS_FIELDS)
        ways_writer = hlp.UnicodeDictWriter(ways_file, WAY_FIELDS)
        way_nodes_writer = hlp.UnicodeDictWriter(way_nodes_file, WAY_NODES_FIELDS)
        way_tags_writer = hlp.UnicodeDictWriter(way_tags_file, WAY_TAGS_FIELDS)

        validator = cerberus.Validator()

        for element in hlp.get_element(file_in, tags=('node', 'way')):
            try:
                el = shape_element(element, fixer=fixer)
            except Exception as er:
                print "Failed to shape the following element:\n", ET.tostring(element, encoding='utf-8')
                print "Catched Exeption:\n", er
                continue

            if el:
                if validate is True:
                    if not valid_element(el, validator):
                        print "Excluding not valid element:\n", ET.tostring(element, encoding='utf-8')
                        continue

                if element.tag == 'node':
                    nodes_writer.writerow(el['node'])
                    node_tags_writer.writerows(el['node_tags'])
                elif element.tag == 'way':
                    ways_writer.writerow(el['way'])
                    way_nodes_writer.writerows(el['way_nodes'])
                    way_tags_writer.writerows(el['way_tags'])
コード例 #17
0
def test_chk_complete():
    test_filename = 'test_data\\test_chk_complete.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_complete(addr)
        results.append((valid, msges))

    assert_equal(results[9][0], False)
    assert_equal(results[9][1], ["COMPLETENESS: Missed hsnumber"])
    assert_equal(results[10][0], True)
    assert_equal(results[10][1], [])
    assert_equal(results[11][0], False)
    assert_equal(results[11][1],
                 ["COMPLETENESS: Missed fstnumber, streetnumber or both"])
    assert_equal(results[12][0], False)
    assert_equal(
        results[12][1],
        ["COMPLETENESS: Missed streetnumber or fstnumber in hsnumber"])
コード例 #18
0
def test_chk_consist():
    test_filename = 'test_data\\test_chk_consist.osm'
    results = []
    for element in hlp.get_element(test_filename, tags=('node', 'way')):
        id = element.attrib["id"]
        addr = ad.get_addrnum(element)

        valid, msges = ad.chk_consist(addr)
        results.append((valid, msges))

    assert_equal(results[10][0], False)
    assert_equal(results[10][1], [
        "CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"
    ])
    assert_equal(results[11][0], True)
    assert_equal(results[11][1], [])
    assert_equal(results[12][0], False)
    assert_equal(results[12][1], [
        "CONSISTENCY: One-number hsnumber is not consistent with fstnumber or streetnumber"
    ])
    assert_equal(results[13][0], False)
    assert_equal(results[13][1], [
        "CONSISTENCY: Composite hsnumber is not consistent with fstnumber and streetnumber"
    ])