def test_fuzzy_match(example_IRS_service_data, example_IRS_search_object_with_spelled_out_saint, mock_config_object): client = get_mongo_client() if 'pytest_fuzzy_test' in client.list_collection_names(): client.drop_collection('pytest_fuzzy_test') client.create_collection('pytest_fuzzy_test') insert_services(example_IRS_service_data, client, 'pytest_fuzzy_test') refresh_ngrams(client, 'pytest_fuzzy_test') name = example_IRS_search_object_with_spelled_out_saint['name'] zip_code = example_IRS_search_object_with_spelled_out_saint['zip'] dc = locate_potential_duplicate(name, zip_code, client, 'pytest_fuzzy_test') client.drop_collection('pytest_fuzzy_test') assert dc == 'ST FERIOLE ISLAND PARK'
scraper = SummerMealSitesScraper( source=data_source_name, data_url=data_url, data_page_url=data_url, data_format="DF", extract_usecols=None, drop_duplicates_columns=[ 'siteName', 'siteAddress', 'siteZip', 'siteCity', 'siteState' ], rename_columns={ 'siteName': 'name', 'siteStatus': 'notes', 'siteAddress': 'address1', 'siteCity': 'city', 'siteState': 'state', 'siteZip': 'zip', 'sitePhone': 'phone', 'Country': 'country' }, service_summary="Food Bank", check_collection="services", dump_collection="tmpSummerMealSites", dupe_collection="tmpSummerMealSitesDuplicates", data_source_collection_name=data_source_name, collection_dupe_field='name') if __name__ == '__main__': client = get_mongo_client() scraper.main_scraper(client)