コード例 #1
0
def audit_fixme(db, collection):
    keys = [
        'fixme', 'FIXME', 'FIXME2', 'fixme:de', 'note:FIXME', 'source:fixme'
    ]
    for key in keys[1:]:
        db[collection].update({}, {'$rename': {key: "fixme"}})
    db_ops.move_db(db, collection, 'NeedsFix', 'fixme', None)
コード例 #2
0
def audit_postcode(db, collection):
    pcs = db_ops.get_values(db, collection, ['address.postcode'])
    for pc in pcs:
        #One given postcode did not exist:
        if pc == '22701':
            pc = '22765' #found at google maps
            db[collection].update({'address.postcode':'22701'},{'$set':{'address.postcode':pc}})
        #postcodes and states were crossvalidated with the google maps API
        url = 'http://maps.googleapis.com/maps/api/geocode/json?address=urlencode($_REQUEST[' + pc + ' Germany])&sensor=false'
        attempts = 0
        success = False
        while success != True and attempts < 3:
            page = urllib.urlopen(url)
            data = json.loads(page.read())
            attempts += 1
            
            if data['status'] == "OVER_QUERY_LIMIT":
                time.sleep(2)
                continue
            success = True

        if attempts == 3:
            print("Daily limit has been reached")

        for i in range(len(data['results'][0]['address_components'])):
            if data['results'][0]['address_components'][i]['long_name'] in ['Schleswig-Holstein', 'Niedersachsen','Hamburg', 'Lower Saxony']:
                state = data['results'][0]['address_components'][i]['long_name']
                if state == 'Lower Saxony':
                    state = 'Niedersachsen'
        #Set the state field, if not already given
        if db[collection].find({'address.postcode':pc, 'address.state' : {'$exists' : 'false'}}) != None:
            db[collection].update({'address.postcode':pc},{'$set':{'address.state':state}})
        if state != 'Hamburg':
            db_ops.move_db(db, collection, 'SurroundingStates', 'address.postcode', pc)
コード例 #3
0
def audit_state(db, collection):
    states = db_ops.get_values(db, collection, ['address.state'])
    # One Document had DE as state. validation showed that Schleswig-Holstein is the correct value
    #Be careful if using different data sets
    cor_state = {'manual':{'DE':'Schleswig-Holstein', 'NI':'Niedersachsen', 'Lower Saxony':'Niedersachsen', 'HH':'Hamburg'}}
    db_ops.update_db(db, collection, 'address.state', cor_state)    
    states = db_ops.get_values(db, collection, ['address.state'])
    for state in states:
        if state != 'Hamburg':
            db_ops.move_db(db, collection, 'SurroundingStates', 'address.state', state)
コード例 #4
0
def audit_postcode(db, collection):
    pcs = db_ops.get_values(db, collection, ['address.postcode'])
    for pc in pcs:
        #One given postcode did not exist:
        if pc == '22701':
            pc = '22765'  #found at google maps
            db[collection].update({'address.postcode': '22701'},
                                  {'$set': {
                                      'address.postcode': pc
                                  }})
        #postcodes and states were crossvalidated with the google maps API
        url = 'http://maps.googleapis.com/maps/api/geocode/json?address=urlencode($_REQUEST[' + pc + ' Germany])&sensor=false'
        attempts = 0
        success = False
        while success != True and attempts < 3:
            page = urllib.urlopen(url)
            data = json.loads(page.read())
            attempts += 1

            if data['status'] == "OVER_QUERY_LIMIT":
                time.sleep(2)
                continue
            success = True

        if attempts == 3:
            print("Daily limit has been reached")

        for i in range(len(data['results'][0]['address_components'])):
            if data['results'][0]['address_components'][i]['long_name'] in [
                    'Schleswig-Holstein', 'Niedersachsen', 'Hamburg',
                    'Lower Saxony'
            ]:
                state = data['results'][0]['address_components'][i][
                    'long_name']
                if state == 'Lower Saxony':
                    state = 'Niedersachsen'
        #Set the state field, if not already given
        if db[collection].find({
                'address.postcode': pc,
                'address.state': {
                    '$exists': 'false'
                }
        }) != None:
            db[collection].update({'address.postcode': pc},
                                  {'$set': {
                                      'address.state': state
                                  }})
        if state != 'Hamburg':
            db_ops.move_db(db, collection, 'SurroundingStates',
                           'address.postcode', pc)
コード例 #5
0
def audit_state(db, collection):
    states = db_ops.get_values(db, collection, ['address.state'])
    # One Document had DE as state. validation showed that Schleswig-Holstein is the correct value
    #Be careful if using different data sets
    cor_state = {
        'manual': {
            'DE': 'Schleswig-Holstein',
            'NI': 'Niedersachsen',
            'Lower Saxony': 'Niedersachsen',
            'HH': 'Hamburg'
        }
    }
    db_ops.update_db(db, collection, 'address.state', cor_state)
    states = db_ops.get_values(db, collection, ['address.state'])
    for state in states:
        if state != 'Hamburg':
            db_ops.move_db(db, collection, 'SurroundingStates',
                           'address.state', state)
コード例 #6
0
def audit_city(db, collection):
    cities = db_ops.get_values(db, collection, ['address.city'])
    #clean city names shown to have problem characters
    cities_to_clean = {'wrong_state': {'Barendorf, Kreis Lüneburg':'Barendorf',
                                        'Wintermoor a. d. Ch.':'Wintermoor an der Chaussee',
                                        'Moisburg/Hollenstedt':'Moisburg',
                                        'Lauenburg/Elbe':'Lauenburg Elbe'}}
    db_ops.update_db(db, collection, 'address.city', cities_to_clean)
    #cross calidate cities with state and postal codes
    for city in cities:
        url = 'http://maps.googleapis.com/maps/api/geocode/json?address=urlencode($_REQUEST["' + city.encode('utf8') + '" Germany])&sensor=false&amp;oe=utf-8'
        attempts = 0
        success = False
        while success != True and attempts < 3:
            page = urllib.urlopen(url)
            data = json.loads(page.read())
            attempts += 1
            
            if data['status'] == "OVER_QUERY_LIMIT":
                time.sleep(2)
                continue
            success = True

        if attempts == 3:
            print("Daily limit has been reached")
        if data['status'] == 'ZERO_RESULTS':
            #print(city + ' not found!')
            with open('Output\\' + collection + '_unknown-cities.txt', 'a') as f:
                f.write(unicode(city + '\n').encode("utf-8"))
        else:
            try:
                for i in range(len(data['results'][0]['address_components'])):
                    if data['results'][0]['address_components'][i]['types'] == ['postal_code']:
                        pc = data['results'][0]['address_components'][i]['long_name']
                        if db[collection].find({'address.city':city, 'address.postcode':{'$ne':pc}}) != None:
                            print(city, pc)
                            db[collection].update({'address.city':city, 'address.postcode':{'$ne':pc}},{'$set':{'address.postcode':pc}})
                    if data['results'][0]['address_components'][i]['types'] == [ "administrative_area_level_1", "political" ]:
                        state = data['results'][0]['address_components'][i]['long_name']
                        if state != 'Hamburg':
                            db_ops.move_db(db, collection, 'SurroundingStates', 'address.city', city)
            except:
                pprint.pprint(data)
コード例 #7
0
def audit_city(db, collection):
    cities = db_ops.get_values(db, collection, ['address.city'])
    #clean city names shown to have problem characters
    cities_to_clean = {
        'wrong_state': {
            'Barendorf, Kreis Lüneburg': 'Barendorf',
            'Wintermoor a. d. Ch.': 'Wintermoor an der Chaussee',
            'Moisburg/Hollenstedt': 'Moisburg',
            'Lauenburg/Elbe': 'Lauenburg Elbe'
        }
    }
    db_ops.update_db(db, collection, 'address.city', cities_to_clean)
    #cross calidate cities with state and postal codes
    for city in cities:
        url = 'http://maps.googleapis.com/maps/api/geocode/json?address=urlencode($_REQUEST["' + city.encode(
            'utf8') + '" Germany])&sensor=false&amp;oe=utf-8'
        attempts = 0
        success = False
        while success != True and attempts < 3:
            page = urllib.urlopen(url)
            data = json.loads(page.read())
            attempts += 1

            if data['status'] == "OVER_QUERY_LIMIT":
                time.sleep(2)
                continue
            success = True

        if attempts == 3:
            print("Daily limit has been reached")
        if data['status'] == 'ZERO_RESULTS':
            #print(city + ' not found!')
            with open('Output\\' + collection + '_unknown-cities.txt',
                      'a') as f:
                f.write(unicode(city + '\n').encode("utf-8"))
        else:
            try:
                for i in range(len(data['results'][0]['address_components'])):
                    if data['results'][0]['address_components'][i][
                            'types'] == ['postal_code']:
                        pc = data['results'][0]['address_components'][i][
                            'long_name']
                        if db[collection].find({
                                'address.city': city,
                                'address.postcode': {
                                    '$ne': pc
                                }
                        }) != None:
                            print(city, pc)
                            db[collection].update(
                                {
                                    'address.city': city,
                                    'address.postcode': {
                                        '$ne': pc
                                    }
                                }, {'$set': {
                                    'address.postcode': pc
                                }})
                    if data['results'][0]['address_components'][i][
                            'types'] == [
                                "administrative_area_level_1", "political"
                            ]:
                        state = data['results'][0]['address_components'][i][
                            'long_name']
                        if state != 'Hamburg':
                            db_ops.move_db(db, collection, 'SurroundingStates',
                                           'address.city', city)
            except:
                pprint.pprint(data)
コード例 #8
0
def audit_fixme(db, collection):
    keys = ['fixme','FIXME', 'FIXME2',  'fixme:de', 'note:FIXME','source:fixme']
    for key in keys[1:]:
        db[collection].update({}, {'$rename':{key:"fixme"}})
    db_ops.move_db(db, collection, 'NeedsFix', 'fixme', None)