def create_database(self):
        """This method create new weather table in the databse"""
        test = WeatherScraper(
            'https://climate.weather.gc.ca/climate_data/daily_data_e.html?'
            'StationID=27174&timeframe=2&StartYear=1999&'
            'EndYear=1999&Day=1&Year=2015&Month=11#')
        weather = test.scrape_weather()

        if os.path.exists("weather.sqlite"):
            os.remove("weather.sqlite")
        with UseDatabase('weather.sqlite') as cursor:
            execute_str = '''create table weather
                                (id integer primary key autoincrement not null,
                                sample_date text not null,
                                location text not null,
                                min_temp real not null,
                                max_temp real not null,
                                avg_temp real not null,
                                UNIQUE (sample_date));'''

            cursor.execute(execute_str)

            sqlite_insert = """INSERT INTO weather
                                    (sample_date,location,min_temp,max_temp,avg_temp)
                                    VALUES (?,?,?,?,?);"""

            for item in weather.items():
                cursor.execute(sqlite_insert,
                               (item[0], "Winnipeg,MB", item[1]["Max"],
                                item[1]["Min"], item[1]["Mean"]))
Ejemplo n.º 2
0
 def clear_db_and_install_all_weather_data(self, event):
     " clear db and install all weather data "
     myweather = WeatherScraper()
     myweather.start_scraping()
     weather_data_from_weather_scraper = myweather.weather
     db_operations = DBOperations(self.db_name)
     db_operations.initialize_db(self.table_name)
     db_operations.purge_data(self.table_name)
     db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
Ejemplo n.º 3
0
 def renew_all_data(self):
     """
     Fetch all new data from website and cover the database.
     :return:
     """
     try:
         my_scraper = WeatherScraper()
         my_scraper.scrape_now_to_earliest_month_weather()
         self.my_db.purge_data()
         self.my_db.save_data(my_scraper.weather)
     except Exception as e:
         self.logger.error(e)
Ejemplo n.º 4
0
 def scrape_and_save_weather_data(self, end_year:int = 0, end_month:int = 12):
     if not end_year:
         today = date.today()
         end_year = today.year
         end_month = today.month
     myweather = WeatherScraper()
     myweather.start_scraping('url', end_year, end_month)
     weather_data_from_weather_scraper = myweather.weather
     db_operations = DBOperations(self.db_name)
     db_operations.initialize_db(self.table_name)
     db_operations.purge_data(self.table_name)
     db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
Ejemplo n.º 5
0
    def __init__(self):
        """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """
        try:
            self.db = DBOperations("weather.sqlite")
            self.ws = WeatherScraper()
            self.pl = PlotOperations()
            self.last_updated = self.db.fetch_last(
            )[0]["sample_date"] if self.db.is_table_exist() else ""
            self.first_updated = self.db.fetch_first(
            )[0]["sample_date"] if self.db.is_table_exist() else ""

        except Exception as e:
            logging.error(f"weatherprocessor:__init__, {e}")
Ejemplo n.º 6
0
    def update_db(self, event):
        " install missing weather data "
        myweather = WeatherScraper()
        with DBOperations(self.db_name) as dbcm:
            dbcm.execute(f"select max(sample_date) from {self.table_name};")
            latest_date = dbcm.fetchall()[0][0]

        print('latest date in db', latest_date)
        myweather.start_scraping(latest_date)
        weather_data_from_weather_scraper = myweather.weather
        db_operations = DBOperations(self.db_name)
        db_operations.initialize_db(self.table_name)
        db_operations.save_data(weather_data_from_weather_scraper, self.table_name)
Ejemplo n.º 7
0
 def update_db(self):
     """Update database to most recent day of year"""
     data_url_list = self.generate_data_url()
     weather = dict()
     for url in data_url_list:
         print('Scraping data from: ')
         print(url)
         myparser = WeatherScraper()
         with urllib.request.urlopen(url) as response:
             html = str(response.read())
         myparser.feed(html)
         weather.update(myparser.temps_data)
     db = DBOperations()
     db.update_database(weather)
Ejemplo n.º 8
0
class TestScraping(unittest.TestCase):
    def setUp(self):
        self.myweather = WeatherScraper()

    def test_scraper_type(self):
        self.assertIsInstance(self.myweather, WeatherScraper)

    def test_weather_return_type(self):
        year = 1996
        month = 1
        url = ("http://climate.weather.gc.ca/" +
               "climate_data/daily_data_e.html" + "?StationID=27174" +
               "&timeframe=2&StartYear=1840" + "&EndYear=" + str(year) +
               "&Day=1&Year=" + str(year) + "&Month=" + str(month) + "#")
        self.myweather.start_scraping(url, year)
        self.assertIs(type(self.myweather.weather), dict)
        daily = self.myweather.weather[list(self.myweather.weather.keys())[0]]
        self.assertIs(type(daily), dict)
Ejemplo n.º 9
0
    def fill_missing_data(self):
        """
        Fetch the gap data from now to the last one in the database and just insert these data.
        :return:
        """
        try:
            last_one_date = self.my_db.fetch_last_one()[0][0]
            last_one_year = int(last_one_date[:4])
            last_one_month = int(last_one_date[5:7])

            year = date.today().year
            month = date.today().month
            my_scraper = WeatherScraper()

            if last_one_year == year and last_one_month == month:
                my_scraper.scrape_month_weather(year, month)
            while last_one_year != year and last_one_month != month:
                my_scraper.scrape_month_weather(year, month)
                month -= 1
                if month == 0:
                    year -= 1
                    month = 12

            self.my_db.save_data(my_scraper.weather)
        except Exception as e:
            self.logger.error(e)
    def update_db(self):
        """uses todays date to fetch all the data
           from the most recent date in the db
           to today
        """
        try:
            self.db_status_text.set(
                "Fetching the data and Updating the Database")
            get_latest_row = self.db_ops.get_latest_row()
            scraper = WeatherScraper()
            if get_latest_row == None:
                data = scraper.scrape_all_data()
                self.db_ops.save_data(data)
            else:
                latest_db_date = get_latest_row[1]
                today = datetime.today().strftime('%Y-%m-%d')
                if (today != latest_db_date and today > latest_db_date):
                    data = scraper.scrape_data(latest_db_date, today)
                    self.db_ops.save_data(data)
            self.db_status_text.set(" ")

        except Exception as e:
            print("ERROR: " + str(e))
Ejemplo n.º 11
0
class WeatherProcessor:
    """
  This class manages the user interaction to generate plots and update the data.
  """
    def __init__(self):
        """ Initialize classes and variables for db operations, web scraping, plot operations and UI configuration """
        try:
            self.db = DBOperations("weather.sqlite")
            self.ws = WeatherScraper()
            self.pl = PlotOperations()
            self.last_updated = self.db.fetch_last(
            )[0]["sample_date"] if self.db.is_table_exist() else ""
            self.first_updated = self.db.fetch_first(
            )[0]["sample_date"] if self.db.is_table_exist() else ""

        except Exception as e:
            logging.error(f"weatherprocessor:__init__, {e}")

    def download_data(self):
        """ Clears the database, reinitializes it, then downloads all the data to it. """
        try:
            self.db.purge_data()
            self.db.initialize_db()
            self.collect_data()

        except Exception as e:
            logging.error(f"weatherprocessor:download_data, {e}")

    def update_data(self):
        """ Ensures the database exists then downloads all
        the data up to the most recent date in the database. """
        try:
            self.db.initialize_db()
            self.collect_data()
            self.last_updated = self.db.fetch_last()[0]["sample_date"]

        except Exception as e:
            logging.error(f"weatherprocessor:update_data, {e}")

    def get_box_plot(self, start_year, end_year):
        """ Fetches data within the users inputted range then
        generates a box plot for the mean temperatures of each month. """
        try:
            weather = self.db.fetch_data(start_year, int(end_year) + 1, False)
            self.pl.generate_box_plot(weather, start_year, end_year)

        except Exception as e:
            logging.error(f"weatherprocessor:get_box_plot, {e}")

    def get_line_plot(self, year, month):
        """ User inputs the month and year of the data to be fetched
        then generates a line plot for the daily mean temperatures of that month. """
        try:
            weather = self.db.fetch_data(year, month, True)
            self.pl.generate_line_plot(weather, year, month)

        except Exception as e:
            logging.error(f"weatherprocessor:get_line_plot, {e}")

    def collect_data(self):
        """ This method collects the data by looping through and prepping for save,
        Get the current date and break it down into variables,
        Query db for the latest recorded data by date,
        Call the scraper class to collect necessary data,
        Stop collecting after duplicates are found. """
        try:
            today = date.today()
            year = int(today.strftime("%Y"))
            month = int(today.strftime("%m"))
            duplicate_month, duplicate_day = False, False
            recent_date = ""

            dates = self.db.fetch_last()
            if len(dates) > 0:
                recent_date = dates[0]["sample_date"]

            while not duplicate_month and not duplicate_day:
                """ Iterates through each year starting with the
            latest and working backwards until duplicate data is found. """
                try:
                    month_dict = dict()

                    while not duplicate_day and month > 0:
                        """ Iterate through each month starting with the latest
                and working backwards until duplicate data is found. """
                        try:
                            url = self.ws.get_url(year, month)

                            with urllib.request.urlopen(url) as response:
                                html = str(response.read())

                            self.ws.feed(html)
                            month_dict[month] = self.ws.return_dict()

                            if month + 1 in month_dict.keys(
                            ) and month_dict[month] == month_dict[month + 1]:
                                """Checks if month is the same as the prior month. Used for download_data """
                                month_dict.popitem()
                                duplicate_month = True
                                break

                            if recent_date != "":
                                temp_dict = {}
                                for key, value in reversed(
                                        month_dict[month].items()):
                                    """Iterates through each months data enusring there is not a duplicate in the database."""
                                    try:
                                        check_date = f"{year}-{month:02d}-{key}"
                                        if check_date == recent_date:
                                            duplicate_day = True

                                            break
                                        temp_dict[key] = value

                                    except Exception as e:
                                        logging.error(
                                            f"weatherprocessor:collect_data:loop:loop2:loop3, {e}"
                                        )

                                month_dict[month] = temp_dict
                            self.db.save_data(month_dict[month], month, year)
                            month -= 1

                        except Exception as e:
                            logging.error(
                                f"weatherprocessor:collect_data:loop:loop2, {e}"
                            )

                    pub.sendMessage('update_latest_download', year=str(year))
                    month = 12
                    year -= 1

                except Exception as e:
                    logging.error(f"weatherprocessor:collect_data:loop, {e}")

        except Exception as e:
            logging.error(f"weatherprocessor:collect_data, {e}")

    def get_years_for_dropdown(self, min_year):
        """Retrieves the years for the combo boxes based on a given min_year."""
        try:
            years = []

            if self.db.is_table_exist():

                self.last_updated = self.db.fetch_last(
                )[0]["sample_date"] if self.db.is_table_exist() else ""
                self.first_updated = self.db.fetch_first(
                )[0]["sample_date"] if self.db.is_table_exist() else ""

                if min_year == "":
                    firstyear = int(self.first_updated[:4])
                else:
                    firstyear = int(min_year)

                lastyear = int(self.last_updated[:4])

                while firstyear <= lastyear:
                    """Starting from the first year add each year to the years list."""
                    try:
                        years.append(str(firstyear))
                        firstyear += 1

                    except Exception as e:
                        logging.error(
                            f"weatherprocessor:get_years_for_dropdown:loop, {e}"
                        )

            return years

        except Exception as e:
            logging.error(f"weatherprocessor:get_years_for_dropdown, {e}")

    def get_months_for_dropdown(self, year):
        """Retrieves the months for the month combo box based on the selected year."""
        try:
            months = []

            if self.db.is_table_exist():

                self.first_updated = self.db.fetch_first(
                )[0]["sample_date"] if self.db.is_table_exist() else ""

                if year == "":
                    year = int(self.first_updated[:4])

                data = self.db.fetch_months(year)

                for item in data:
                    """Goes through the list of returned data"""
                    try:
                        for value in item.values():
                            """Adds each month to a list of months."""
                            try:
                                months.append(str(value[-2:]))

                            except Exception as e:
                                logging.error(
                                    f"weatherprocessor:get_months_for_dropdown:loop:loop2, {e}"
                                )

                    except Exception as e:
                        logging.error(
                            f"weatherprocessor:get_months_for_dropdown:loop, {e}"
                        )

            return months[::-1]

        except Exception as e:
            logging.error(f"weatherprocessor:get_months_for_dropdown, {e}")
Ejemplo n.º 12
0
        try:
            print('Purging all the data from the database... ')
            with DBOperations(self.db_name) as cursor:
                sql_purge_data_1 = """DELETE FROM samples;"""
                sql_purge_data_2 = """DELETE FROM sqlite_sequence WHERE name = 'samples';"""
                cursor.execute(sql_purge_data_1)
                cursor.execute(sql_purge_data_2)
        except Exception as e:
            self.logger.error(e)


if __name__ == '__main__':
    mydb = DBOperations('weather.sqlite')
    mydb.initialize_db()

    my_scraper = WeatherScraper()
    my_scraper.scrape_month_weather(2020, 12)
    my_scraper.scrape_now_to_earliest_month_weather(1998, 5)

    mydb.purge_data()
    mydb.save_data(my_scraper.weather)
    for key, value in my_scraper.weather.items():
        print(key + ': ' + str(value))

    print('years data')
    for item in mydb.fetch_data(1996):
        print(item)

    print('month data')
    for item in mydb.fetch_data(2020, 12):
        print(item)
Ejemplo n.º 13
0
 def OnClickedUpdate(self, event):
     scraper = WeatherScraper()
     scraper.start_scraping()
Ejemplo n.º 14
0
 def OnClickedDownload(self, event):
     db = DBOperations()
     db.purge_data()
     db.initialize_db()
     scraper = WeatherScraper()
     scraper.start_scraping()
Ejemplo n.º 15
0
 def main(self):
     """
     When the program starts, prompt the user to download a full set of
     weather data, or to update it (optional).
     • Then prompt the user for a year range of interest (from year, to year).
     • Use this class to launch and manage all the other tasks.
     """
     user_selection = ''
     while user_selection != '4':
         try:
             print("1. Update a set of weather data up to today")
             print("2. Download a full set of weather data")
             print("3. A year range of interest (from year, to year)")
             print("4. Exit")
             user_selection = input("Please make your choice...")
             if user_selection == '1':
                 try:
                     my_scraper = WeatherScraper()
                     now_date = datetime.datetime.now()
                     is_loop = False
                     for i in range(now_date.year, now_date.year - 1, -1):
                         my_scraper.url_year = i
                         if is_loop:
                             break
                         for j in range(now_date.month - 2,
                                        now_date.month + 1):
                             my_scraper.url_month = j
                             my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#"
                             with urllib.request.urlopen(
                                     my_url) as response:
                                 html = str(response.read())
                             my_scraper.feed(html)
                             if my_scraper.is_equal is False:
                                 is_loop = True
                                 break
                     # print(f"inner{my_scraper.dict_Inner}")
                     # print(f"outer{my_scraper.dict_outer}")
                     my_database = DBOperations()
                     my_database.create_table(my_scraper.dict_outer)
                 except Exception as e:
                     print(
                         "Error in Updating a set of weather data up to today: ",
                         e)
             elif user_selection == '2':
                 try:
                     my_scraper = WeatherScraper()
                     now_date = datetime.datetime.now()
                     is_loop = False
                     for i in reversed(range(now_date.year)):
                         my_scraper.url_year = i
                         if is_loop:
                             break
                         for j in range(0, 13):
                             my_scraper.url_month = j
                             my_url = f"https://climate.weather.gc.ca/climate_data/daily_data_e.html?%20StationID=27174&timeframe=2&StartYear=1840&EndYear=2018&Day=%201&Year={my_scraper.url_year}&Month={my_scraper.url_month}#"
                             with urllib.request.urlopen(
                                     my_url) as response:
                                 html = str(response.read())
                             my_scraper.feed(html)
                             if my_scraper.is_equal is False:
                                 is_loop = True
                                 break
                     # print(f"inner{my_scraper.dict_Inner}")
                     # print(f"outer{my_scraper.dict_outer}")
                     my_database = DBOperations()
                     my_database.create_table(my_scraper.dict_outer)
                 except Exception as e:
                     print(
                         "Error in downloading a full set of weather data: ",
                         e)
             elif user_selection == '3':
                 try:
                     range_value = input(
                         "Please select a RANGE of your interest(e.g 2017 2019): "
                     )
                     range_value = range_value.split()
                     my_database = DBOperations()
                     dict_value = my_database.query_infos(
                         range_value[0], range_value[1])
                     my_plot_operation = PlotOperations()
                     my_plot_operation.diplay_box_plot(
                         dict_value, range_value[0], range_value[1])
                 except Exception as e:
                     print(
                         "Error in A year range of interest (from year, to year): ",
                         e)
             elif user_selection == '4':
                 break
             else:
                 print("Invalid choice")
         except Exception as e:
             print("Error plot_operations.py: ", e)
Ejemplo n.º 16
0
            mkdir_p(output_dir)
            file_path = '{0}/{1}'.format(output_dir, file_name)

            self.line_plot_path_saving_dict[str(specific_year) + '-' +
                                            str(specific_month)] = file_path
            plt.savefig(file_path)
            plt.show()

            return self.line_plot_path_saving_dict
        except Exception as e:
            self.logger.error(e)


if __name__ == '__main__':
    mydb = DBOperations('weather.sqlite')
    mydb.initialize_db()

    my_scraper = WeatherScraper()
    my_scraper.scrape_now_to_earliest_month_weather(
        1998, 5)  # For testing, range is 1996-1997
    my_scraper.scrape_month_weather(2018, 5)
    my_scraper.scrape_month_weather(2020, 12)

    mydb.purge_data()
    mydb.save_data(my_scraper.weather)

    my_plot = PlotOperations()
    my_plot.generate_box_plot(1996, 1997)
    my_plot.generate_line_plot(2018, 5)
    my_plot.generate_line_plot(2020, 12)
Ejemplo n.º 17
0
        """
        fetch the data base on year in the database.
        """
        with DBOperations(self.name) as dbcm:
            dbcm.execute(
                f"select * from {table_name} where sample_date like '{year}%';"
            )
            fetch_weather = dbcm.fetchall()

        return fetch_weather

    def purge_data(self, table_name: str):
        """
        purge the data currently in the database.
        """
        with DBOperations(self.name) as dbcm:
            dbcm.execute(f"delete from {table_name} ;")


if __name__ == "__main__":
    myweather = WeatherScraper()
    myweather.start_scraping('url', 1997)
    weather_data_from_weather_scraper = myweather.weather
    db_name = 'weather.sqlite'
    table_name = 'weather'
    db_operations = DBOperations(db_name)
    db_operations.initialize_db(table_name)
    db_operations.purge_data(table_name)
    db_operations.save_data(weather_data_from_weather_scraper, table_name)
    pprint.pprint(db_operations.fetch_data(table_name, 1996))
 def setUp(self):
     self.myweather = WeatherScraper()
Ejemplo n.º 19
0
        """
        fetch the data base on year in the database.
        """
        with DBOperations(self.name) as dbcm:
            dbcm.execute(
                f"select * from {table_name} where sample_date like '{year}%';"
            )
            fetch_weather = dbcm.fetchall()

        return fetch_weather

    def purge_data(self, table_name: str):
        """
        purge the data currently in the database.
        """
        with DBOperations(self.name) as dbcm:
            dbcm.execute(f"delete from {table_name} ;")


if __name__ == "__main__":
    WEATHER = WeatherScraper()
    WEATHER.start_scraping()
    weather_data_from_weather_scraper = WEATHER.weather
    DB_NAME = 'weather.sqlite'
    TABLE_NAME = 'weather'
    DB_OPERATIONS = DBOperations(DB_NAME)
    DB_OPERATIONS.initialize_db(TABLE_NAME)
    DB_OPERATIONS.purge_data(TABLE_NAME)
    DB_OPERATIONS.save_data(weather_data_from_weather_scraper, TABLE_NAME)
    pprint.pprint(DB_OPERATIONS.fetch_data(TABLE_NAME, 1996))