def create_database(self): """This method create new weather table in the databse""" test = WeatherScraper( 'https://climate.weather.gc.ca/climate_data/daily_data_e.html?' 'StationID=27174&timeframe=2&StartYear=1999&' 'EndYear=1999&Day=1&Year=2015&Month=11#') weather = test.scrape_weather() if os.path.exists("weather.sqlite"): os.remove("weather.sqlite") with UseDatabase('weather.sqlite') as cursor: execute_str = '''create table weather (id integer primary key autoincrement not null, sample_date text not null, location text not null, min_temp real not null, max_temp real not null, avg_temp real not null, UNIQUE (sample_date));''' cursor.execute(execute_str) sqlite_insert = """INSERT INTO weather (sample_date,location,min_temp,max_temp,avg_temp) VALUES (?,?,?,?,?);""" for item in weather.items(): cursor.execute(sqlite_insert, (item[0], "Winnipeg,MB", item[1]["Max"], item[1]["Min"], item[1]["Mean"]))
def clear_db_and_install_all_weather_data(self, event): " clear db and install all weather data " myweather = WeatherScraper() myweather.start_scraping() weather_data_from_weather_scraper = myweather.weather db_operations = DBOperations(self.db_name) db_operations.initialize_db(self.table_name) db_operations.purge_data(self.table_name) db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
def renew_all_data(self): """ Fetch all new data from website and cover the database. :return: """ try: my_scraper = WeatherScraper() my_scraper.scrape_now_to_earliest_month_weather() self.my_db.purge_data() self.my_db.save_data(my_scraper.weather) except Exception as e: self.logger.error(e)
def scrape_and_save_weather_data(self, end_year:int = 0, end_month:int = 12): if not end_year: today = date.today() end_year = today.year end_month = today.month myweather = WeatherScraper() myweather.start_scraping('url', end_year, end_month) weather_data_from_weather_scraper = myweather.weather db_operations = DBOperations(self.db_name) db_operations.initialize_db(self.table_name) db_operations.purge_data(self.table_name) db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
def __init__(self): """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """ try: self.db = DBOperations("weather.sqlite") self.ws = WeatherScraper() self.pl = PlotOperations() self.last_updated = self.db.fetch_last( )[0]["sample_date"] if self.db.is_table_exist() else "" self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" except Exception as e: logging.error(f"weatherprocessor:__init__, {e}")
def update_db(self, event): " install missing weather data " myweather = WeatherScraper() with DBOperations(self.db_name) as dbcm: dbcm.execute(f"select max(sample_date) from {self.table_name};") latest_date = dbcm.fetchall()[0][0] print('latest date in db', latest_date) myweather.start_scraping(latest_date) weather_data_from_weather_scraper = myweather.weather db_operations = DBOperations(self.db_name) db_operations.initialize_db(self.table_name) db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
def update_db(self): """Update database to most recent day of year""" data_url_list = self.generate_data_url() weather = dict() for url in data_url_list: print('Scraping data from: ') print(url) myparser = WeatherScraper() with urllib.request.urlopen(url) as response: html = str(response.read()) myparser.feed(html) weather.update(myparser.temps_data) db = DBOperations() db.update_database(weather)
class TestScraping(unittest.TestCase): def setUp(self): self.myweather = WeatherScraper() def test_scraper_type(self): self.assertIsInstance(self.myweather, WeatherScraper) def test_weather_return_type(self): year = 1996 month = 1 url = ("http://climate.weather.gc.ca/" + "climate_data/daily_data_e.html" + "?StationID=27174" + "&timeframe=2&StartYear=1840" + "&EndYear=" + str(year) + "&Day=1&Year=" + str(year) + "&Month=" + str(month) + "#") self.myweather.start_scraping(url, year) self.assertIs(type(self.myweather.weather), dict) daily = self.myweather.weather[list(self.myweather.weather.keys())[0]] self.assertIs(type(daily), dict)
def fill_missing_data(self): """ Fetch the gap data from now to the last one in the database and just insert these data. :return: """ try: last_one_date = self.my_db.fetch_last_one()[0][0] last_one_year = int(last_one_date[:4]) last_one_month = int(last_one_date[5:7]) year = date.today().year month = date.today().month my_scraper = WeatherScraper() if last_one_year == year and last_one_month == month: my_scraper.scrape_month_weather(year, month) while last_one_year != year and last_one_month != month: my_scraper.scrape_month_weather(year, month) month -= 1 if month == 0: year -= 1 month = 12 self.my_db.save_data(my_scraper.weather) except Exception as e: self.logger.error(e)
def update_db(self): """uses todays date to fetch all the data from the most recent date in the db to today """ try: self.db_status_text.set( "Fetching the data and Updating the Database") get_latest_row = self.db_ops.get_latest_row() scraper = WeatherScraper() if get_latest_row == None: data = scraper.scrape_all_data() self.db_ops.save_data(data) else: latest_db_date = get_latest_row[1] today = datetime.today().strftime('%Y-%m-%d') if (today != latest_db_date and today > latest_db_date): data = scraper.scrape_data(latest_db_date, today) self.db_ops.save_data(data) self.db_status_text.set(" ") except Exception as e: print("ERROR: " + str(e))
class WeatherProcessor: """ This class manages the user interaction to generate plots and update the data. """ def __init__(self): """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """ try: self.db = DBOperations("weather.sqlite") self.ws = WeatherScraper() self.pl = PlotOperations() self.last_updated = self.db.fetch_last( )[0]["sample_date"] if self.db.is_table_exist() else "" self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" except Exception as e: logging.error(f"weatherprocessor:__init__, {e}") def download_data(self): """ Clears the database, reinitializes it, then downloads all the data to it. """ try: self.db.purge_data() self.db.initialize_db() self.collect_data() except Exception as e: logging.error(f"weatherprocessor:download_data, {e}") def update_data(self): """ Ensures the database exists then downloads all the data up to the most recent date in the database. """ try: self.db.initialize_db() self.collect_data() self.last_updated = self.db.fetch_last()[0]["sample_date"] except Exception as e: logging.error(f"weatherprocessor:update_data, {e}") def get_box_plot(self, start_year, end_year): """ Fetches data within the users inputted range then generates a box plot for the mean temperatures of each month. """ try: weather = self.db.fetch_data(start_year, int(end_year) + 1, False) self.pl.generate_box_plot(weather, start_year, end_year) except Exception as e: logging.error(f"weatherprocessor:get_box_plot, {e}") def get_line_plot(self, year, month): """ User inputs the month and year of the data to be fetched then generates a line plot for the daily mean temperatures of that month. """ try: weather = self.db.fetch_data(year, month, True) self.pl.generate_line_plot(weather, year, month) except Exception as e: logging.error(f"weatherprocessor:get_line_plot, {e}") def collect_data(self): """ This method collects the data by looping through and prepping for save, Get the current date and break it down into variables, Query db for the latest recorded data by date, Call the scraper class to collect necessary data, Stop collecting after duplicates are found. """ try: today = date.today() year = int(today.strftime("%Y")) month = int(today.strftime("%m")) duplicate_month, duplicate_day = False, False recent_date = "" dates = self.db.fetch_last() if len(dates) > 0: recent_date = dates[0]["sample_date"] while not duplicate_month and not duplicate_day: """ Iterates through each year starting with the latest and working backwards until duplicate data is found. """ try: month_dict = dict() while not duplicate_day and month > 0: """ Iterate through each month starting with the latest and working backwards until duplicate data is found. """ try: url = self.ws.get_url(year, month) with urllib.request.urlopen(url) as response: html = str(response.read()) self.ws.feed(html) month_dict[month] = self.ws.return_dict() if month + 1 in month_dict.keys( ) and month_dict[month] == month_dict[month + 1]: """Checks if month is the same as the prior month. Used for download_data """ month_dict.popitem() duplicate_month = True break if recent_date != "": temp_dict = {} for key, value in reversed( month_dict[month].items()): """Iterates through each months data enusring there is not a duplicate in the database.""" try: check_date = f"{year}-{month:02d}-{key}" if check_date == recent_date: duplicate_day = True break temp_dict[key] = value except Exception as e: logging.error( f"weatherprocessor:collect_data:loop:loop2:loop3, {e}" ) month_dict[month] = temp_dict self.db.save_data(month_dict[month], month, year) month -= 1 except Exception as e: logging.error( f"weatherprocessor:collect_data:loop:loop2, {e}" ) pub.sendMessage('update_latest_download', year=str(year)) month = 12 year -= 1 except Exception as e: logging.error(f"weatherprocessor:collect_data:loop, {e}") except Exception as e: logging.error(f"weatherprocessor:collect_data, {e}") def get_years_for_dropdown(self, min_year): """Retrieves the years for the combo boxes based on a given min_year.""" try: years = [] if self.db.is_table_exist(): self.last_updated = self.db.fetch_last( )[0]["sample_date"] if self.db.is_table_exist() else "" self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" if min_year == "": firstyear = int(self.first_updated[:4]) else: firstyear = int(min_year) lastyear = int(self.last_updated[:4]) while firstyear <= lastyear: """Starting from the first year add each year to the years list.""" try: years.append(str(firstyear)) firstyear += 1 except Exception as e: logging.error( f"weatherprocessor:get_years_for_dropdown:loop, {e}" ) return years except Exception as e: logging.error(f"weatherprocessor:get_years_for_dropdown, {e}") def get_months_for_dropdown(self, year): """Retrieves the months for the month combo box based on the selected year.""" try: months = [] if self.db.is_table_exist(): self.first_updated = self.db.fetch_first( )[0]["sample_date"] if self.db.is_table_exist() else "" if year == "": year = int(self.first_updated[:4]) data = self.db.fetch_months(year) for item in data: """Goes through the list of returned data""" try: for value in item.values(): """Adds each month to a list of months.""" try: months.append(str(value[-2:])) except Exception as e: logging.error( f"weatherprocessor:get_months_for_dropdown:loop:loop2, {e}" ) except Exception as e: logging.error( f"weatherprocessor:get_months_for_dropdown:loop, {e}" ) return months[::-1] except Exception as e: logging.error(f"weatherprocessor:get_months_for_dropdown, {e}")
try: print('Purging all the data from the database... ') with DBOperations(self.db_name) as cursor: sql_purge_data_1 = """DELETE FROM samples;""" sql_purge_data_2 = """DELETE FROM sqlite_sequence WHERE name = 'samples';""" cursor.execute(sql_purge_data_1) cursor.execute(sql_purge_data_2) except Exception as e: self.logger.error(e) if __name__ == '__main__': mydb = DBOperations('weather.sqlite') mydb.initialize_db() my_scraper = WeatherScraper() my_scraper.scrape_month_weather(2020, 12) my_scraper.scrape_now_to_earliest_month_weather(1998, 5) mydb.purge_data() mydb.save_data(my_scraper.weather) for key, value in my_scraper.weather.items(): print(key + ': ' + str(value)) print('years data') for item in mydb.fetch_data(1996): print(item) print('month data') for item in mydb.fetch_data(2020, 12): print(item)
def OnClickedUpdate(self, event): scraper = WeatherScraper() scraper.start_scraping()
def OnClickedDownload(self, event): db = DBOperations() db.purge_data() db.initialize_db() scraper = WeatherScraper() scraper.start_scraping()
def main(self): """ When the program starts, prompt the user to download a full set of weather data, or to update it (optional). • Then prompt the user for a year range of interest (from year, to year). • Use this class to launch and manage all the other tasks. """ user_selection = '' while user_selection != '4': try: print("1. Update a set of weather data up to today") print("2. Download a full set of weather data") print("3. A year range of interest (from year, to year)") print("4. Exit") user_selection = input("Please make your choice...") if user_selection == '1': try: my_scraper = WeatherScraper() now_date = datetime.datetime.now() is_loop = False for i in range(now_date.year, now_date.year - 1, -1): my_scraper.url_year = i if is_loop: break for j in range(now_date.month - 2, now_date.month + 1): my_scraper.url_month = j my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#" with urllib.request.urlopen( my_url) as response: html = str(response.read()) my_scraper.feed(html) if my_scraper.is_equal is False: is_loop = True break # print(f"inner{my_scraper.dict_Inner}") # print(f"outer{my_scraper.dict_outer}") my_database = DBOperations() my_database.create_table(my_scraper.dict_outer) except Exception as e: print( "Error in Updating a set of weather data up to today: ", e) elif user_selection == '2': try: my_scraper = WeatherScraper() now_date = datetime.datetime.now() is_loop = False for i in reversed(range(now_date.year)): my_scraper.url_year = i if is_loop: break for j in range(0, 13): my_scraper.url_month = j my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#" with urllib.request.urlopen( my_url) as response: html = str(response.read()) my_scraper.feed(html) if my_scraper.is_equal is False: is_loop = True break # print(f"inner{my_scraper.dict_Inner}") # print(f"outer{my_scraper.dict_outer}") my_database = DBOperations() my_database.create_table(my_scraper.dict_outer) except Exception as e: print( "Error in downloading a full set of weather data: ", e) elif user_selection == '3': try: range_value = input( "Please select a RANGE of your interest(e.g 2017 2019): " ) range_value = range_value.split() my_database = DBOperations() dict_value = my_database.query_infos( range_value[0], range_value[1]) my_plot_operation = PlotOperations() my_plot_operation.diplay_box_plot( dict_value, range_value[0], range_value[1]) except Exception as e: print( "Error in A year range of interest (from year, to year): ", e) elif user_selection == '4': break else: print("Invalid choice") except Exception as e: print("Error plot_operations.py: ", e)
mkdir_p(output_dir) file_path = '{0}/{1}'.format(output_dir, file_name) self.line_plot_path_saving_dict[str(specific_year) + '-' + str(specific_month)] = file_path plt.savefig(file_path) plt.show() return self.line_plot_path_saving_dict except Exception as e: self.logger.error(e) if __name__ == '__main__': mydb = DBOperations('weather.sqlite') mydb.initialize_db() my_scraper = WeatherScraper() my_scraper.scrape_now_to_earliest_month_weather( 1998, 5) # For testing, range is 1996-1997 my_scraper.scrape_month_weather(2018, 5) my_scraper.scrape_month_weather(2020, 12) mydb.purge_data() mydb.save_data(my_scraper.weather) my_plot = PlotOperations() my_plot.generate_box_plot(1996, 1997) my_plot.generate_line_plot(2018, 5) my_plot.generate_line_plot(2020, 12)
""" fetch the data base on year in the database. """ with DBOperations(self.name) as dbcm: dbcm.execute( f"select * from {table_name} where sample_date like '{year}%';" ) fetch_weather = dbcm.fetchall() return fetch_weather def purge_data(self, table_name: str): """ purge the data currently in the database. """ with DBOperations(self.name) as dbcm: dbcm.execute(f"delete from {table_name} ;") if __name__ == "__main__": myweather = WeatherScraper() myweather.start_scraping('url', 1997) weather_data_from_weather_scraper = myweather.weather db_name = 'weather.sqlite' table_name = 'weather' db_operations = DBOperations(db_name) db_operations.initialize_db(table_name) db_operations.purge_data(table_name) db_operations.save_data(weather_data_from_weather_scraper, table_name) pprint.pprint(db_operations.fetch_data(table_name, 1996))
def setUp(self): self.myweather = WeatherScraper()
""" fetch the data base on year in the database. """ with DBOperations(self.name) as dbcm: dbcm.execute( f"select * from {table_name} where sample_date like '{year}%';" ) fetch_weather = dbcm.fetchall() return fetch_weather def purge_data(self, table_name: str): """ purge the data currently in the database. """ with DBOperations(self.name) as dbcm: dbcm.execute(f"delete from {table_name} ;") if __name__ == "__main__": WEATHER = WeatherScraper() WEATHER.start_scraping() weather_data_from_weather_scraper = WEATHER.weather DB_NAME = 'weather.sqlite' TABLE_NAME = 'weather' DB_OPERATIONS = DBOperations(DB_NAME) DB_OPERATIONS.initialize_db(TABLE_NAME) DB_OPERATIONS.purge_data(TABLE_NAME) DB_OPERATIONS.save_data(weather_data_from_weather_scraper, TABLE_NAME) pprint.pprint(DB_OPERATIONS.fetch_data(TABLE_NAME, 1996))