def test_verify_zip_both(fake_requests_get):
    req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    res = """<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>"""

    (fake_requests_get.expects_call()
        .with_args(req)
        .returns(fudge.Fake('Response').has_attr(content=res)))

    address = OrderedDict([
        ('address', '6406 Ivy Lane'),
        ('city', 'Greenbelt'),
        ('state', 'MD'),
        ('zip_code', '207701441'),
    ])
    res = verify(
        'foo_id',
        address,
    )

    expected = OrderedDict([
        ('address', '6406 IVY LN'),
        ('city', 'GREENBELT'),
        ('state', 'MD'),
        ('zip5', '20770'),
        ('zip4', '1441'),
    ])
    eq(res, expected)
def test_verify_zip_dash(fake_urlopen):
    fake_urlopen = fake_urlopen.expects_call()
    req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    fake_urlopen = fake_urlopen.with_args(req)
    res = StringIO(u"""<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>""")
    fake_urlopen.returns(res)

    address = OrderedDict([
            ('address', '6406 Ivy Lane'),
            ('city', 'Greenbelt'),
            ('state', 'MD'),
            ('zip_code', '20770-1441'),
            ])
    res = verify(
        'foo_id',
        address
        )

    expected = OrderedDict([
            ('address', '6406 IVY LN'),
            ('city', 'GREENBELT'),
            ('state', 'MD'),
            ('zip5', '20770'),
            ('zip4', '1441'),
            ])
    eq(res, expected)
def test_verify_zip_both(fake_urlopen):
    fake_urlopen = fake_urlopen.expects_call()
    req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%3E20770%3C%2FZip5%3E%3CZip4%3E1441%3C%2FZip4%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    fake_urlopen = fake_urlopen.with_args(req)
    res = StringIO("""<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address></AddressValidateResponse>"""
                   )
    fake_urlopen.returns(res)

    address = OrderedDict([
        ('address', '6406 Ivy Lane'),
        ('city', 'Greenbelt'),
        ('state', 'MD'),
        ('zip_code', '207701441'),
    ])
    res = verify(
        'foo_id',
        address,
    )

    expected = OrderedDict([
        ('address', '6406 IVY LN'),
        ('city', 'GREENBELT'),
        ('state', 'MD'),
        ('zip5', '20770'),
        ('zip4', '1441'),
    ])
    eq(res, expected)
    def usps_validate(self):

        line_1, line_2 = self.mapped_lines

        key = settings.USPS_API_KEY

        data = {'address': line_1, 'city': line_2}

        result = {}

        try:
            result = address_information.verify(key, data)
        except ValueError as e:
            self.failed(e.message)
            return

        if result.get('returntext'):
            self.matched_partial(result, "(USPS): " + result.get('returntext'))

        # Good USPS match, partial Google match
        elif self.status == MAPPED_PARTIAL:
            self.matched_partial(result, self.message)

        else:
            assert (self.status == MAPPED)  # Belt and suspenders
            self.matched(result,
                         "Address is fully matched and is deliverable.")
    def usps_validate(self):

        line_1, line_2 = self.mapped_lines

        key = settings.USPS_API_KEY

        data = {
            'address': line_1,
            'city': line_2
        }

        result = {}

        try:
            result = address_information.verify(key, data)
        except ValueError as e:
            self.failed(e.message)
            return

        if result.get('returntext'):
            self.matched_partial(result, "(USPS): " + result.get('returntext'))

        # Good USPS match, partial Google match
        elif self.status == MAPPED_PARTIAL:
            self.matched_partial(result, self.message)

        else:
            assert (self.status == MAPPED)  # Belt and suspenders
            self.matched(result, "Address is fully matched and is deliverable.")
Esempio n. 6
0
def normalize_address(address):
    """Normalize a domestic (US) address"""
    if address['country'] != 'US':
        return address, False

    addr = {
        'zip_code': address['postal_code'],
        'state': address['state'],
        'city': address['city'],
        'address': address['line1']
    }
    line2 = address.get('line2')
    if line2:
        addr['address_extended'] = line2
    try:
        usps_addr = address_information.verify(current_app.config['USPS_USER_ID'], addr)
        norm_addr = {}
        changed = False
        for k_frm, k_to in USPS_ADDRESS_KEYS.items():
            norm_addr[k_to] = usps_addr.get(k_frm)
            if (norm_addr[k_to] or '').lower() != (address[k_to] or '').lower():
                changed = True
        norm_addr['country'] = 'US'
        return norm_addr, changed
    except ValueError:
        return None, True
Esempio n. 7
0
def _save_venues(
    events,
    events_coll,
    usps_id,
    now,
    ):
    # Don't waste a call to the USPS API
    if not events:
        return

    venues = [event['facebook']['venue'] for event in events]
    usps_venues = [
        OrderedDict([
                ('address', venue['street']),
                ('city', venue['city']),
                ('state', venue['state']),
                ])
        for venue in venues
        ]
    matches = address_information.verify(
        usps_id,
        *usps_venues
        )
    # TODO fugly
    if len(usps_venues) == 1:
        matches = [matches]
    for (event,match) in zip(events,matches):
        if isinstance(match, ValueError):
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='normalization_failed',
                reason=str(match),
                )
            continue

        match['country'] = 'US'
        save = OrderedDict([
            ('normalized', match),
            ('ubernear.normalization_completed', now),
            ('ubernear.normalization_source', 'usps'),
            ])

        log.debug(
            'Storing normalized venue for {event_id}'.format(
                event_id=event['_id'],
                )
            )
        mongo.save_no_replace(
            events_coll,
            _id=event['_id'],
            save=save,
            )
def gen_standardize_address(addr1, addr2, key, results, usps_key):
    addr = {'address': addr1, 'city': addr2, 'state': 'NY'}
    try:
        result = address_information.verify(usps_key, addr)
        zip4 = "-{}".format(result['zip4']) if ('zip4' in result) and result['zip4'] else ''
        results[key] = "{}, {} {} {}{}".format(
            result['address'],
            result['city'],
            result['state'],
            result['zip5'],
            zip4)
    except Exception as e:
        results[key] = "{}, {}".format(addr1, addr2)
Esempio n. 9
0
def address_lookup(batch, usps_key):
    # Form donor addresses
    logging.info("address_lookup processing batch: {}".format(str(batch)))
    post_data = []
    if batch is None:
        return []

    for row in batch:
        try:
            addr1 = row['ContributorAddr1']
            if ',' in row['ContributorAddr2']:
                city = row['ContributorAddr2'].split(',')[0]
            else:
                city = row['ContributorAddr2'].split(' ')[0]
            post_data.append({'address': addr1, 'city': city, 'state': 'NY'})
        except Exception as e:
            logging.error(
                "Could not append to post_data in address_lookup: {}".format(
                    e))

    # Submit batch to API
    try:
        recv_data = address_information.verify(usps_key, *post_data)
    except Exception as e:
        # There was only one entry in the batch, and it failed
        logging.error(
            "Caught exception posting to address_information.verify: {}".
            format(e))
    # Match
    output = []
    for i, row in enumerate(batch):
        if row is not None:
            out_dct = row.copy()
        else:
            continue
        # Try and use formatted address
        try:
            out_dct['ContributorAddr1'] = recv_data[i]['address']
            if isinstance(recv_data[i]['zip5'], int):
                # So defensive
                recv_data[i]['zip5'] = "{:0.0f}".format(recv_data[i]['zip5'])
            out_dct['ContributorAddr2'] = "{}, {} {}".format(
                recv_data[i]['city'], recv_data[i]['state'],
                recv_data[i]['zip5'])
        except Exception as e:
            # Output from pyusps is Exception not dict, etc.
            logging.error(
                "Caught exception building out_dct in address_lookup: {}".
                format(e))
        output.append(out_dct)
    return output
def test_verify_multiple(fake_requests_get):
    req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1+%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ECT%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    res = """<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Address2>8 WILDWOOD DR</Address2><City>OLD LYME</City><State>CT</State><Zip5>06371</Zip5><Zip4>1844</Zip4></Address></AddressValidateResponse>"""

    (fake_requests_get.expects_call()
        .with_args(req)
        .returns(fudge.Fake('Response').has_attr(content=res)))

    addresses = [
        OrderedDict([
            ('address', '6406 Ivy Lane'),
            ('city', 'Greenbelt'),
            ('state', 'MD'),
        ]),
        OrderedDict([
            ('address', '8 Wildwood Drive'),
            ('city', 'Old Lyme'),
            ('state', 'CT'),
        ]),
    ]
    res = verify(
        'foo_id',
        *addresses
    )

    expected = [
        OrderedDict([
            ('address', '6406 IVY LN'),
            ('city', 'GREENBELT'),
            ('state', 'MD'),
            ('zip5', '20770'),
            ('zip4', '1441'),
        ]),
        OrderedDict([
            ('address', '8 WILDWOOD DR'),
            ('city', 'OLD LYME'),
            ('state', 'CT'),
            ('zip5', '06371'),
            ('zip4', '1844'),
        ]),
    ]
    eq(res, expected)
def test_verify_api_address_error_multiple(fake_requests_get):
    req = """http://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1+%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1+%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ENJ%3C%2FState%3E%3CZip5+%2F%3E%3CZip4+%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    res = """<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Error><Number>-2147219400</Number><Source>API_AddressCleancAddressClean.CleanAddress2;SOLServer.CallAddressDll</Source><Description>Invalid City.</Description><HelpFile></HelpFile><HelpContext>1000440</HelpContext></Error></Address></AddressValidateResponse>"""

    (fake_requests_get.expects_call()
        .with_args(req)
        .returns(fudge.Fake('Response').has_attr(content=res)))

    addresses = [
        OrderedDict([
            ('address', '6406 Ivy Lane'),
            ('city', 'Greenbelt'),
            ('state', 'MD'),
        ]),
        OrderedDict([
            ('address', '8 Wildwood Drive'),
            ('city', 'Old Lyme'),
            ('state', 'NJ'),
        ]),
    ]
    res = verify(
        'foo_id',
        *addresses
    )

    # eq does not work with exceptions. Process each item manually.
    eq(len(res), 2)
    eq(
        res[0],
        OrderedDict([
            ('address', '6406 IVY LN'),
            ('city', 'GREENBELT'),
            ('state', 'MD'),
            ('zip5', '20770'),
            ('zip4', '1441'),
        ]),
    )
    assert_errors_equal(
        res[1],
        ValueError('-2147219400: Invalid City.'),
    )
Esempio n. 12
0
def test_verify_multiple(fake_urlopen):
    fake_urlopen = fake_urlopen.expects_call()
    req = """https://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ECT%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    fake_urlopen = fake_urlopen.with_args(req)
    res = StringIO(u"""<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Address2>8 WILDWOOD DR</Address2><City>OLD LYME</City><State>CT</State><Zip5>06371</Zip5><Zip4>1844</Zip4></Address></AddressValidateResponse>""")
    fake_urlopen.returns(res)

    addresses = [
        OrderedDict([
                ('address', '6406 Ivy Lane'),
                ('city', 'Greenbelt'),
                ('state', 'MD'),
                ]),
        OrderedDict([
                ('address', '8 Wildwood Drive'),
                ('city', 'Old Lyme'),
                ('state', 'CT'),
                ]),
        ]
    res = verify(
        'foo_id',
        *addresses
        )

    expected = [
        OrderedDict([
                ('address', '6406 IVY LN'),
                ('city', 'GREENBELT'),
                ('state', 'MD'),
                ('zip5', '20770'),
                ('zip4', '1441'),
                ]),
        OrderedDict([
                ('address', '8 WILDWOOD DR'),
                ('city', 'OLD LYME'),
                ('state', 'CT'),
                ('zip5', '06371'),
                ('zip4', '1844'),
                ]),
        ]
    eq(res, expected)
Esempio n. 13
0
def test_verify_api_address_error_multiple(fake_urlopen):
    fake_urlopen = fake_urlopen.expects_call()
    req = """https://production.shippingapis.com/ShippingAPI.dll?API=Verify&XML=%3CAddressValidateRequest+USERID%3D%22foo_id%22%3E%3CAddress+ID%3D%220%22%3E%3CAddress1%2F%3E%3CAddress2%3E6406+Ivy+Lane%3C%2FAddress2%3E%3CCity%3EGreenbelt%3C%2FCity%3E%3CState%3EMD%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3CAddress+ID%3D%221%22%3E%3CAddress1%2F%3E%3CAddress2%3E8+Wildwood+Drive%3C%2FAddress2%3E%3CCity%3EOld+Lyme%3C%2FCity%3E%3CState%3ENJ%3C%2FState%3E%3CZip5%2F%3E%3CZip4%2F%3E%3C%2FAddress%3E%3C%2FAddressValidateRequest%3E"""
    fake_urlopen = fake_urlopen.with_args(req)
    res = StringIO(u"""<?xml version="1.0"?>
<AddressValidateResponse><Address ID="0"><Address2>6406 IVY LN</Address2><City>GREENBELT</City><State>MD</State><Zip5>20770</Zip5><Zip4>1441</Zip4></Address><Address ID="1"><Error><Number>-2147219400</Number><Source>API_AddressCleancAddressClean.CleanAddress2;SOLServer.CallAddressDll</Source><Description>Invalid City.</Description><HelpFile></HelpFile><HelpContext>1000440</HelpContext></Error></Address></AddressValidateResponse>""")
    fake_urlopen.returns(res)

    addresses = [
        OrderedDict([
                ('address', '6406 Ivy Lane'),
                ('city', 'Greenbelt'),
                ('state', 'MD'),
                ]),
        OrderedDict([
                ('address', '8 Wildwood Drive'),
                ('city', 'Old Lyme'),
                ('state', 'NJ'),
                ]),
        ]
    res = verify(
        'foo_id',
        *addresses
        )

    # eq does not work with exceptions. Process each item manually.
    eq(len(res), 2)
    eq(
        res[0],
        OrderedDict([
                ('address', '6406 IVY LN'),
                ('city', 'GREENBELT'),
                ('state', 'MD'),
                ('zip5', '20770'),
                ('zip4', '1441'),
                ]),
       )
    assert_errors_equal(
        res[1],
        ValueError('-2147219400: Invalid City.'),
        )
Esempio n. 14
0
def vf_standardize_address(row, results, usps_key):
    """Used for the NY State Voter File only."""
    rhalfcode = '' if pd.isnull(row['RHALFCODE']) else row['RHALFCODE']
    raddnumber = '' if pd.isnull(row['RADDNUMBER']) else row['RADDNUMBER']
    rpredirection = '' if pd.isnull(
        row['RPREDIRECTION']) else row['RPREDIRECTION']
    rstreetname = '' if pd.isnull(row['RSTREETNAME']) else row['RSTREETNAME']
    rpostdirection = '' if pd.isnull(
        row['RPOSTDIRECTION']) else row['RPOSTDIRECTION']
    rapartment = '' if pd.isnull(row['RAPARTMENT']) else row['RAPARTMENT']

    if ('APT' in str(row['RAPARTMENT']).upper()) \
            or ('UNIT' in str(row['RAPARTMENT']).upper()) \
            or (row['RAPARTMENT'] == ''):
        address = "{} {} {} {} {} {}".format(
            raddnumber,
            rhalfcode,
            rpredirection,
            rstreetname,
            rpostdirection,
            rapartment)
    else:
        address = "{} {} {} {} {} APT {}".format(
            raddnumber,
            rhalfcode,
            rpredirection,
            rstreetname,
            rpostdirection,
            rapartment)
    try:
        address = address.upper()
        addr = {'address': address, 'city': row['RCITY'], 'state': 'NY'}
        result = address_information.verify(usps_key, addr)
        zip4 = "-{}".format(result['zip4']) if result['zip4'] else ''
        results[row['SBOEID']] = "{}, {} {} {}{}".format(
            result['address'], result['city'], result['state'], result['zip5'], zip4)
    except Exception:
        results[row['SBOEID']] = address
from pyusps import address_information
from csv import reader
#import re

addresses = list()
with open('output.txt', 'w') as f:
    f.write('City\tState\tAddress\tZip Code\n')
with open('addresses.txt', 'r') as f:
    r = reader(f, delimiter='\t')
    # Skip the first row.
    next(r)
    for row in r:
        city, state, addr, zip_code = row
        address = dict([('address', addr), ('city', city), ('state', state), ('zip_code', zip_code)])
        try:
            addresses.append(address_information.verify("033NONE01173",address))
        except:
            print("Check address " + address['address'])
        print(address)

    for address in addresses:
        try:
            addr = address['address']+"\t"+address['city']+"\t"+address['state']+"\t"+address['zip5']+"-"+address['zip4']
            print(addr)
            open('output.txt', 'a').write(addr + '\n')
        except:
            print("There was an error")
Esempio n. 16
0
 def verify_with_usps(self, addresses):
     ### Needs to be in a try block for the usps verify method to not raise an error
     try:
         return address_information.verify(self.usps_id, *addresses)
     except:
         return False
Esempio n. 17
0
def standardize_address(batch, usps_key):
    # Form voter addresses
    post_data = []
    if batch is None:
        return []

    for row in batch:
        try:
            raddnumber = row['RADDNUMBER'].strip() if row['RADDNUMBER'] else ""
            rstreetname = row['RSTREETNAME'].strip(
            ) if row['RSTREETNAME'] else ""
            if row['RAPARTMENT']:
                if ('APT' in row['RAPARTMENT']) or (
                        'UNIT' in row['RAPARTMENT']) or ('PH'
                                                         in row['RAPARTMENT']):
                    rapartment = row['RAPARTMENT']
                else:
                    rapartment = "APT {}".format(row['RAPARTMENT'])
            else:
                rapartment = ""
            post_data.append({
                'address':
                u" ".join([raddnumber, rstreetname, rapartment]),
                'city':
                row['RCITY'],
                'state':
                'NY'
            })
        except Exception as e:
            logging.Info(
                "Could not form address in standardize_address, error: {}".
                format(e))
            post_data.append(None)

    # Submit batch to API
    try:
        recv_data = address_information.verify(usps_key, *post_data)
    except Exception as e:
        logging.error(
            "Caught exception posting to standardize_address: {}".format(e))

    # Match
    output = []
    for i, row in enumerate(batch):
        if row is not None:
            out_dct = row.copy()
        else:
            continue

        # Try and use formatted address
        try:
            out_dct['voter_addr1'] = recv_data[i]['address']
            if isinstance(recv_data[i]['zip5'], int):
                # So defensive
                recv_data[i]['zip5'] = "{:0.0f}".format(recv_data[i]['zip5'])
            out_dct['voter_addr2'] = "{}, {} {}".format(
                recv_data[i]['city'], recv_data[i]['state'],
                recv_data[i]['zip5'])
        except Exception as e:
            # Output from pyusps is Exception not dict, etc.; fall back on
            # constructed string
            try:
                out_dct['voter_addr1'] = post_data[i]['address']
            except Exception as e:
                # e.g. because post_data[i] is None
                out_dct['voter_addr1'] = None
            out_dct['voter_addr2'] = "{}, NY {}".format(
                row['RCITY'], row['RZIP5'])

        # Form match string, whatever happens
        out_dct['match_string'] = "{} {}, {}, {}".format(
            out_dct['LASTNAME'].strip(), out_dct['FIRSTNAME'].strip(),
            out_dct['voter_addr1'], out_dct['voter_addr2'])
        output.append(out_dct)
    return output
Esempio n. 18
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--csv',
        help='Path to the CSV file containing the places to import',
        metavar='PATH',
        )
    parser.add_option(
        '--config',
        help=('Path to the config file with information on how to '
              'import places'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--db-config',
        help=('Path the to file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.set_defaults(
        verbose=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.csv is None:
        parser.error('Missing option --csv=.')
    if options.config is None:
        parser.error('Missing option --config=.')
    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    places_csv = absolute_path(options.csv)
    config = config_parser(options.config)
    coll = collections(options.db_config)
    places_coll = coll['places-collection']

    usps_id = config.get('usps', 'user_id')

    delimiter = config.get('csv', 'delimiter')
    delimiter = delimiter.decode('string-escape')
    fieldnames = [
        'id',
        'name',
        'address',
        'address_extended',
        'po_box',
        'locality',
        'region',
        'country',
        'postcode',
        'tel',
        'fax',
        'category',
        'website',
        'email',
        'latitude',
        'longitude',
        'status',
        ]

    log.info('Start...')

    with open(places_csv, 'rb') as places_fp:
        places = csv.DictReader(
            places_fp,
            delimiter=delimiter,
            fieldnames=fieldnames,
            )
        for place in places:
            # Don't store empty fields
            save = defaultdict(dict)
            for k,v in place.iteritems():
                if v != '':
                    save['info'][k] = v

            try:
                lat = float(save['info']['latitude'])
                lng = float(save['info']['longitude'])
            except (KeyError, ValueError):
                log.debug(
                    'Did not find a valid latitude and longitude for place '
                    '{_id}'.format(
                        _id=save['info']['id'],
                        )
                    )
            else:
                save['info']['latitude'] = lat
                save['info']['longitude'] = lng
                # Coordinates are always stored in the form [lng,lat],
                # in that order. Anything else might result in incorrect
                # MongoDB Geospatial queries.
                save['ubernear.location'] = [lng, lat]

                error_msg = ('Bad coordinates (lng,lat) {coord} for id '
                             '{_id}'
                             )
                error_msg = error_msg.format(
                    coord=(lng, lat),
                    _id=save['info']['id']
                    )
                if (lng < -180 or lng >= 180) or (lat < -90 or lat > 90):
                    log.error(error_msg)
                    del save['info']['latitude']
                    del save['info']['longitude']
                    del save['ubernear.location']

            if 'address' not in save['info']:
                log.error(
                    'Found place {_id} with no address information. '
                    'Skipping'.format(
                        _id=save['info']['id'],
                        )
                    )
                continue
            match = dict([
                    ('address', save['info']['address']),
                    ('city', save['info']['locality']),
                    ('state', save['info']['region']),
                    ('zipcode', save['info']['postcode']),
                    ])
            if 'address_extended' in save['info']:
                match['address_extended'] = save['info']['address_extended']
            try:
                norm = address_information.verify(usps_id, match)
            except:
                log.error(
                    'The USPS API could not find an address for place '
                    '{_id}'.format(
                        _id=save['info']['id'],
                        )
                    )
            else:
                norm['name'] = save['info']['name'].upper()
                norm['country'] = 'US'
                save['normalized'] = norm
                save['ubernear.normalization_source'] = 'usps'

            save['ubernear.source'] = 'factual'
            mongo.save_no_replace(
                places_coll,
                _id=save['info']['id'],
                save=save,
                )

    indices = [
        {'ubernear.location': pymongo.GEO2D},
        {'ubernear.last_checked': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=places_coll,
        indices=indices,
        )

    log.info('End')