예제 #1
0
def test_fuzzy_match(example_IRS_service_data,
                     example_IRS_search_object_with_spelled_out_saint,
                     mock_config_object):
    client = get_mongo_client()
    if 'pytest_fuzzy_test' in client.list_collection_names():
        client.drop_collection('pytest_fuzzy_test')
    client.create_collection('pytest_fuzzy_test')
    insert_services(example_IRS_service_data, client, 'pytest_fuzzy_test')
    refresh_ngrams(client, 'pytest_fuzzy_test')
    name = example_IRS_search_object_with_spelled_out_saint['name']
    zip_code = example_IRS_search_object_with_spelled_out_saint['zip']
    dc = locate_potential_duplicate(name, zip_code, client,
                                    'pytest_fuzzy_test')
    client.drop_collection('pytest_fuzzy_test')
    assert dc == 'ST FERIOLE ISLAND PARK'
예제 #2
0
scraper = SummerMealSitesScraper(
    source=data_source_name,
    data_url=data_url,
    data_page_url=data_url,
    data_format="DF",
    extract_usecols=None,
    drop_duplicates_columns=[
        'siteName', 'siteAddress', 'siteZip', 'siteCity', 'siteState'
    ],
    rename_columns={
        'siteName': 'name',
        'siteStatus': 'notes',
        'siteAddress': 'address1',
        'siteCity': 'city',
        'siteState': 'state',
        'siteZip': 'zip',
        'sitePhone': 'phone',
        'Country': 'country'
    },
    service_summary="Food Bank",
    check_collection="services",
    dump_collection="tmpSummerMealSites",
    dupe_collection="tmpSummerMealSitesDuplicates",
    data_source_collection_name=data_source_name,
    collection_dupe_field='name')

if __name__ == '__main__':
    client = get_mongo_client()
    scraper.main_scraper(client)