def fill_location_lookup_db(logger): hosts_df = host_scraper.read_hosts_from_db() hosts_df = hosts_df.loc[hosts_df.location != "-"] locations = hosts_df['location'].drop_duplicates() loc_df = geocode_locations(locations, logger) conn, cur = host_scraper.get_db_conn() with conn: loc_df.to_sql(name='map_locations', con=conn, if_exists="replace")
def locations_table_exists(): conn, cur = host_scraper.get_db_conn() with conn: # check if table named map_locations exists cur.execute( ''' SELECT count(name) FROM sqlite_master WHERE type='table' AND name='map_locations' ''' ) return cur.fetchone()[0] == 1 # 1 == exists
def test_create_db(self, scrape_func): scraper.fill_new_db() con, cur = scraper.get_db_conn() columns = [i[1] for i in cur.execute('PRAGMA table_info(hosts)')] assert len(columns) == 9 assert "online_notification" in columns and "first_online_timestamp" in columns db_hosts = scraper.read_hosts_from_db() for timestamp in db_hosts.first_online_timestamp.values: assert timestamp is None
def cache_new_locations(new_hosts_df, logger): if not locations_table_exists(): fill_location_lookup_db() conn, cur = host_scraper.get_db_conn() new_hosts_df = new_hosts_df.loc[new_hosts_df.location != "-"] locations = new_hosts_df['location'].drop_duplicates() cached_locations = pd.read_sql('select * from map_locations', conn) new_locations = locations[~locations.isin(cached_locations. explorer_location)] if new_locations.empty: return loc_df = geocode_locations(new_locations, logger) with conn: loc_df.to_sql(name='map_locations', con=conn, if_exists="append")
def retrieve_cached_locations(): conn, cur = host_scraper.get_db_conn() cached_locations = pd.read_sql('select * from map_locations', conn) return cached_locations