Exemple #1
0
def fill_location_lookup_db(logger):
    hosts_df = host_scraper.read_hosts_from_db()
    hosts_df = hosts_df.loc[hosts_df.location != "-"]
    locations = hosts_df['location'].drop_duplicates()
    loc_df = geocode_locations(locations, logger)
    conn, cur = host_scraper.get_db_conn()
    with conn:
        loc_df.to_sql(name='map_locations', con=conn, if_exists="replace")
Exemple #2
0
def locations_table_exists():
    conn, cur = host_scraper.get_db_conn()
    with conn:
        # check if table named map_locations exists
        cur.execute(
            ''' SELECT count(name) FROM sqlite_master WHERE type='table' AND name='map_locations' '''
        )
        return cur.fetchone()[0] == 1  # 1 == exists
Exemple #3
0
    def test_create_db(self, scrape_func):
        scraper.fill_new_db()
        con, cur = scraper.get_db_conn()
        columns = [i[1] for i in cur.execute('PRAGMA table_info(hosts)')]

        assert len(columns) == 9
        assert "online_notification" in columns and "first_online_timestamp" in columns

        db_hosts = scraper.read_hosts_from_db()
        for timestamp in db_hosts.first_online_timestamp.values:
            assert timestamp is None
Exemple #4
0
def cache_new_locations(new_hosts_df, logger):
    if not locations_table_exists():
        fill_location_lookup_db()

    conn, cur = host_scraper.get_db_conn()
    new_hosts_df = new_hosts_df.loc[new_hosts_df.location != "-"]
    locations = new_hosts_df['location'].drop_duplicates()
    cached_locations = pd.read_sql('select * from map_locations', conn)

    new_locations = locations[~locations.isin(cached_locations.
                                              explorer_location)]
    if new_locations.empty:
        return
    loc_df = geocode_locations(new_locations, logger)
    with conn:
        loc_df.to_sql(name='map_locations', con=conn, if_exists="append")
Exemple #5
0
def retrieve_cached_locations():
    conn, cur = host_scraper.get_db_conn()
    cached_locations = pd.read_sql('select * from map_locations', conn)
    return cached_locations