def test_exact_match_addr2_cat(self, mock_connect): """ Test for exact match when address line 2 concatenated with address line 1 in DB """ test_data = [('XX3X', 'JOHN', 'SMITH', '123 MAIN STREET UNIT 10', '', 'PORTLAND', 'OR', '97202')] setup_mock_connect(mock_connect=mock_connect, test_data=test_data) from openelections.donor_lookup.match import get_match matches = get_match(last_name='Smith', first_name='John', addr1='123 main street', addr2='UNIT 10', zip_code='97202', city='Portland') # No weak or strong matches assert matches['weak'].size == 0 & matches['strong'].size == 0 # One exact match assert matches['exact'].size == 1 # Scores correct ematch = matches['exact'][0] assert ematch['address_sim'] == 1 and ematch[ 'first_name_sim'] == 1 and ematch['last_name_sim'] == 1
def test_max_return(self, mock_connect): """ Test for max return """ test_data = [('XX3X', 'JOHNS', 'SMITH', '523 NE LUMBAR ST', '', 'PORTLAND', 'OR', '97202'), ('XX3Y', 'JOHNS', 'SMITH', '153 NW LUMBARR ST', '', 'PORTLAND', 'OR', '97202'), ('XX3Z', 'JOHN', 'SMITH', '125 NW LUMBAR ST', '', 'PORTLAND', 'OR', '97202')] setup_mock_connect(mock_connect=mock_connect, test_data=test_data) from openelections.donor_lookup.match import get_match matches = get_match(last_name='Smith', first_name='John', addr1='123 lumbar street', zip_code='97202', city='Portland', max_num_matches=2) # No weak or exact matches assert matches['strong'].size == 0 and matches['exact'].size == 0 # One exact match assert matches['weak'].size == 2 # Test right two selected assert matches['weak'][0]['first_name'] == 'JOHN' and matches['weak'][ 0]['last_name'] == 'SMITH' assert matches['weak'][1]['address_1'] == '523 NE LUMBAR ST'
def test_strong_match_abr(self, mock_connect): """ Test for strong match """ test_data = [('XX3X', 'JOHN', 'SMITH', '123 NW LUMBAR ST', '', 'PORTLAND', 'OR', '97202')] setup_mock_connect(mock_connect=mock_connect, test_data=test_data) from openelections.donor_lookup.match import get_match matches = get_match(last_name='Smith', first_name='John', addr1='123 lumbarr street', zip_code='97202', city='Portland') # No weak or exact matches assert matches['weak'].size == 0 and matches['exact'].size == 0 # One exact match assert matches['strong'].size == 1 # Scores correct smatch = matches['strong'][0] assert np.isclose(smatch['address_sim'], 0.97435) and \ smatch['first_name_sim'] == 1 and \ smatch['last_name_sim'] == 1
def test_exact_match_abr(self, mock_connect): """ Test for exact match with abbreviations and words to ignore """ test_data = [('XX3X', 'JOHN', 'SMITH', '123 NW MAIN ST', '', 'PORTLAND', 'OR', '97202')] setup_mock_connect(mock_connect=mock_connect, test_data=test_data) from openelections.donor_lookup.match import get_match matches = get_match(last_name='Smith', first_name='John', addr1='123 main street', zip_code='97202', city='Portland') # No weak or strong matches assert matches['weak'].size == 0 and matches['strong'].size == 0 # One exact match assert matches['exact'].size == 1 # Scores correct ematch = matches['exact'][0] assert ematch['address_sim'] == 1 and ematch[ 'first_name_sim'] == 1 and ematch['last_name_sim'] == 1
def get(self) -> dict: """ Get donor match :return: dict """ try: aparser = reqparse.RequestParser() aparser.add_argument("last_name", type=str, required=True) aparser.add_argument("first_name", type=str, required=True) aparser.add_argument("zip_code", type=str, required=True) aparser.add_argument("addr1", type=str, required=True) aparser.add_argument("addr2", default=None, type=str) aparser.add_argument("city", default=None, type=str) aparser.add_argument("latitude", dest="latitude", default=None, type=str) aparser.add_argument("longitude", dest="longitude", default=None, type=str) aparser.add_argument("max_matches", default=10, type=int) options = aparser.parse_args() matches = get_match(last_name=options['last_name'], first_name=options['first_name'], zip_code=options['zip_code'], addr1=options['addr1'], addr2=options['addr2'], city=options['city'], max_num_matches=options['max_matches']) matches_dict = dict() for mtype, tmatches in matches.items(): fields = tmatches.dtype.names matches_dict[mtype] = [] for match in tmatches: matches_dict[mtype].append( {field: str(match[field]) for field in fields}) # Add donor information to outout donor = { key: str(val).upper() if val is not None else "" for key, val in options.items() } donor['eligible_address'] = str( in_portland(longitude=options['longitude'], latitude=options['latitude'])) matches_dict['donor_info'] = donor # Print JSON output return matches_dict except BaseException: # Catch error and print message to stdout print(traceback.format_exc())
orestar = orestar[valid_payee] return orestar if not USE_SAVED_FILES: orestar = get_orestar() matches = [] runtimes = [] for record in orestar: start_time = time.time() first_name, last_name = record['contributor_payee'].split(' ') local_matches = get_match(first_name=first_name, last_name=last_name, addr1=record['addr_line1'], addr2=record['addr_line2'], city=record['city'], state=record['state'], zip_code=record['zip']) local_matches['cont'] = record matches.append(local_matches) runtimes.append(time.time() - start_time) print(f'Average lookup time: {np.mean(runtimes):0,.3}sec') save(filename=os.path.join(_RESOURCES, 'orestar.pickle'), obj=orestar) save(filename=os.path.join(_RESOURCES, 'orestar_matches.pickle'), obj=matches) else: print('USING SAVED DATA') orestar = load(os.path.join(_RESOURCES, 'orestar.pickle')) matches = load(os.path.join(_RESOURCES, 'orestar_matches.pickle'))