def weather(start_date: str, end_date: str, zip_code: list, frequency: int, location_label=False, export_csv=True, store_df=True, api_key='62c4f496efb147c1b2160953202406') -> pd.DataFrame: """returns a pandas DataFrame that contains: location, date, average temperature, humidity, wind speed, air pressure, uvIndex Args: start_date (str): start date inclusive; '01-Jan-2020' end_date (str): end date inclusive; '02-Jan-2020' zip_code (list): list of zip codes frequency (int): frequency in hours, usually 24 location_label (bool, optional): no clue. Defaults to False. export_csv (bool, optional): keep as True. Defaults to True. store_df (bool, optional): keep as True. Defaults to True. api_key (str, optional): obtained from website; valid for 90 days. Defaults to '62c4f496efb147c1b2160953202406'. Returns: pd.DataFrame: contains data listed above """ retrieve_hist_data(api_key, [zip_code], start_date, end_date, frequency, location_label=location_label, export_csv=True, store_df=store_df) for code in zip_code: data = pd.read_csv(code + '.csv', header=0) os.remove(code + '.csv') print(f'deleted file {zip_code}.csv') return data[['location', 'date_time', 'tempC', 'humidity', 'windspeedKmph', 'pressure', 'uvIndex']] # choose which to return
def get_weather_data(config): """CSV will be written to the current directory.""" retrieve_hist_data(config['api_key'], config['location_list'], config['start_date'], config['end_date'], config['frequency'], location_label=config['location_label'], export_csv=True, store_df=False) return None
def get_weather(): frequency=3 datex = datetime.datetime.now() start_date = str(datex.month-2)+'-'+str(datex.day)+'-'+str(datex.year) end_date = str(datex.month)+'-'+str(datex.day)+'-'+str(datex.year) api_key = '06909010f11242eaae354051202907' location = rgb = request.get_json() location_list = [(location["location"])] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label = False, export_csv = True, store_df = True) data = pd.read_csv(str(location_list[0])+".csv") maxTempValue = round((data["maxtempC"].mean(axis = 0)),2) minTempValue = round((data["mintempC"].mean(axis = 0)),2) feelsLikeValue = round((data["FeelsLikeC"].mean(axis = 0)),2) humidityValue = round((data["humidity"].mean(axis = 0)),2) rainfallValue = round((data["precipMM"].mean(axis = 0)),2) tempValue = round((data["tempC"].mean(axis = 0)),2) return "{}".format( str(humidityValue)+" " + str(tempValue) +" "+ str(rainfallValue))
def prp(): date = request.args.get('date_string') zipcode = request.args.get('zipcode_str') os.chdir("/var/www/FlaskApp/FlaskApp") frequency = 24 start_date = '01-JAN-2010' end_date = '01-JAN-2021' api_key = '0909c9292f294476aba41920211701' location_list = ['97603'] zipcode = "97603" hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) if os.path.isfile(f"/var/www/FlaskApp/FlaskApp/{zipcode}.csv"): data3 = "PRP API call Success" prplist = plantrec.prpmain(date) else: data3 = "PRP API call Failure" temp = {"date": date, "zip": zipcode, "prp": prplist} y = json.dumps(temp) return y
def call_api(self): hist_weather_data = retrieve_hist_data( api_key=self.api_key, location_list=self.location_list, start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, location_label=False, export_csv=True, store_df=True)
def get_wwo_weather(lat_lon_list, start_date, end_date, frequency, key): hist_weather_data = retrieve_hist_data(key, lat_lon_list, start_date, end_date, frequency=frequency, location_label=False, export_csv=True, store_df=True) data = hist_weather_data[0].drop( ['sunrise', 'sunset', 'moonrise', 'moonset', 'moon_illumination', 'FeelsLikeC', 'HeatIndexC', 'WindChillC'], axis=1) wu = pd.DataFrame(np.cos(data['windspeedKmph'].to_numpy().astype(np.float32))) wv = pd.DataFrame(np.sin(data['windspeedKmph'].to_numpy().astype(np.float32))) data['wind_u'] = wu data['wind_v'] = wv return hist_weather_data
def get_clima(): os.chdir("../Data") frequency = 1 start_date = '01-JAN-2014' end_date = '30-AUG-2019' api_key = '28f7f02aa28d4afe9dc215223190509' location_list = ['mexico_city'] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label = False, export_csv=True, store_df = True) clima = pd.read_csv('../Data/mexico_city.csv') clima.drop(columns=['maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour', 'uvIndex.1', 'moonrise', 'moonset', 'sunrise', 'sunset', 'HeatIndexC', 'WindChillC', 'WindGustKmph'], inplace=True) clima.columns = ['fecha_hechos', 'uv', 'ilu_luna', 'punto_rocio', 'temp_sentir', 'nubosidad', 'humedad', 'precipitacion', 'presion', 'temperatura', 'visibilidad', 'dir_viento', 'vel_viento'] return clima
def weather(start_date: str, end_date: str, zip_codes: list, frequency: int, api_key='f098932734bd498196c175043201708') -> None: """writes to a csv file in the meteorological data directory containing 1 year's worth of meteorological data Args: start_date (str): start date inclusive; '01-Jan-2020' end_date (str): end date inclusive; '02-Jan-2020' zip_code (list): list of zip codes frequency (int): frequency in hours, usually 24 api_key (str, optional): obtained from website. Defaults to '7cc48f71b5d94d6abb6191130203007'. """ retrieve_hist_data(api_key, zip_codes, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) for code in zip_codes: data = pd.read_csv(code + '.csv', header=0) with open(r'Data Collection\Data\Meteorological\\' + code + '.csv', 'w', newline='') as f: f.write(data[['location', 'date_time', 'tempC', 'humidity', 'windspeedKmph', 'pressure', 'uvIndex']].to_csv()) os.remove(code + '.csv')
def get_data(place): location_list = [place] files = os.listdir(DATA_URL) file = place + '.csv' if file in files: pass else: hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True)
def load_temp(name, end_date, key, start_date="6-DEC-2009", frequency=24): api_key = key location_list = [name] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) trj = hist_weather_data[0].reset_index() trj = trj.drop(["index"], axis=1) return trj #returns dataframe
def get_initial_data_wwo(locations, start_date, end_date): frequency = 24 api_key_1 = '1d5cc937de61480aaf903645202201' api_key_2 = '340dfd23ecc94a4a8c631942202301' api_key_3 = 'e9c547b9ae304c07b9834623202301' try: api_key = api_key_3 hist_weather_data = retrieve_hist_data(api_key, locations, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) except HTTPError: print('bad api key, try another one')
def collecting_data(): today_date = date.today() yesterday_date = today_date - timedelta(1) os.chdir(r"theweatherforecast") frequency = 3 start_date = '2015-01-01' end_date = yesterday_date.strftime('%Y-%B-%d') api_key = '74a87fc92c714e70aa1112002210101' location_list = ['Rawalpindi'] _ = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True)
def get_weather_data(counties, start_date, end_date, frequency, output_dir): ''' counties: (list) of counties, example: ['nyc', 'queens'] start_date: (str) date, example: '01-JAN-2018' end_date: (str) date, example: '31-DEC-2018' frequency: (int) frequency of historical data, example: 1 for hourly data output_dir: (str) path to directory where you want to store the weather data ''' os.chdir(output_dir) for county in counties: print(f"Getting weather data for {county}") api_key = '<your-api-key>' # enter your api key for wwo location_list = [county] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True)
def fetch(self) -> DataFrame: try: print("Generating history data for {} from {} to {}, Retry: {}". format(self.__location_list, self.__start_date, self.__end_date, self.__retry)) data = retrieve_hist_data(self.__api_key, self.__location_list, self.__start_date, self.__end_date, self.__freq, location_label=False, export_csv=False, store_df=True) sleep(1) return data[0] print("Weather history has been Generated") except: print("Unknown Error while Processing, Processing it again.") self.__retry = self.__retry + 1 if self.__retry <= self.__max_retry: self.fetch()
def weatherDragons(coordinates): # call the weather api and get some weather info frequency = 24 start_date = '01-OCT-2018' #'01-JAN-2018' end_date = '01-NOV-2018' api_key = '49ba24e2b3b8412a9e501452200811' location_list = [str(coordinates[1]) + "," + str(coordinates[0])] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=False, store_df=True) # print(hist_weather_data[0]) data = hist_weather_data[0] data['simple_date'] = pd.to_datetime(data['date_time']).dt.to_period('M') print(data) # TODO group by simple_date and make sum / max for some columns print(data.columns)
good_zips.remove('07189') good_zips.remove('07842') good_zips.remove('07607') good_zips.remove('08878') # find out where we left off when switching api keys files = 0 file_list = list() for file in os.listdir(dir): if file.endswith(".csv"): files += 1 file_list.append(file.replace('.csv', '')) # print to screen to see print("# of files: " + str(files)) # getting forecasted weather data for all the zip codes # gets daily weather forecasts frequency = 24 start_date = '1-JAN-2009' end_date = '31-MAR-2020' api_key = 'cbb45e1ebda64725a11200528201004' location_list = good_zips hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True)
def predict(): ''' For rendering results on HTML GUI ''' int_features = [x for x in request.form.values()] print(int_features) date = int_features[0] group = int_features[1] converted_datetime = pd.to_datetime(date).date() # this function makes sure that when the user enters a date, it calculates the ending of the month the user entered def dateaddition(converted_datetime): if int(str(converted_datetime).split("-")[0]) % 4 == 0: if (int(str(converted_datetime).split("-")[1])) == 2: added_days = 29 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 30 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 30 elif int(str(converted_datetime).split("-")[0]) % 4 != 0: if (int(str(converted_datetime).split("-")[1])) == 2: added_days = 28 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 30 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 30 return added_days #converting the dates to strings to pass them through the weather-collecting api start_date = converted_datetime.strftime("%d-%b-%Y") # calculating the end date based on the function dateaddition end_date = pd.to_datetime(converted_datetime + timedelta(dateaddition(converted_datetime) - 1)).date() #if the user enters the present month, the end date will be calculated based on the present day if end_date > datetime.now().date(): end_date = datetime.now().date().strftime("%d-%b-%Y") else: end_date = end_date.strftime("%d-%b-%Y") # this is an api to collect the necessary weather data we need frequency = 24 start_date = start_date end_date = end_date api_key = '12b2c18a34194a8ca93113127200405' location_list = ['Mlawa'] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) #the weather data is stored in the file "Mlawa.csv", hence we read from that monthly_weather_data = pd.read_csv("Mlawa.csv") def final_weather(monthly_weather_data): #dropping the unecessary columns monthly_weather_data = monthly_weather_data.drop([ "date_time", "totalSnow_cm", "sunHour", "uvIndex.1", "uvIndex", "moon_illumination", "moonrise", "moonset", "sunrise", "DewPointC", "sunset", "WindChillC", "WindGustKmph", "precipMM", "pressure", "visibility", "winddirDegree", "windspeedKmph", "tempC" ], axis=1) monthly_weather_data["avg_temp"] = ( monthly_weather_data["maxtempC"] + monthly_weather_data["mintempC"]) / 2 monthly_weather_data = monthly_weather_data.drop( ["maxtempC", "mintempC"], axis=1) # rearranging the data monthly_weather_data = monthly_weather_data[[ "avg_temp", "FeelsLikeC", "HeatIndexC", "cloudcover", "humidity", ]] values = [] monthly_averages = [] def mean_data(data): for key, value in data.iteritems(): values.append(value.mean()) print(key) return values #categorising the heat_index data into three different types def cat_heat(heatindex): if heatindex < -2: return (0) elif heatindex >= -1 and heatindex <= 14: return (1) else: return (2) #categorising the cloud cover into 4 different types def cat_cloud(cloudcover): if cloudcover < 25: return (0) elif cloudcover >= 25 and cloudcover < 50: return (1) elif cloudcover >= 50 and cloudcover < 75: return (1) else: return (3) monthly_averages = np.around(mean_data(monthly_weather_data), 2) monthly_averages[2] = cat_heat(monthly_averages[2]) monthly_averages[3] = cat_cloud(monthly_averages[3]) return monthly_averages monthly_weather_data = final_weather(monthly_weather_data) print(monthly_weather_data) final_data = pd.read_csv("GROUP_OF_ITEMS_FINAL/" + group.upper() + ".csv") final_data = final_data.drop(["Unnamed: 0"], axis=1) # print(final_data.head()) final_data["ishol/week"] = 9 final_data["group"] = group final_data["monthly_Avgtemp"] = monthly_weather_data[0] final_data["monthly_avg_FeelsLikeC"] = monthly_weather_data[1] final_data["monthly_avg_HeatIndexC"] = monthly_weather_data[2] final_data["monthly_avg_cloudcover"] = monthly_weather_data[3] final_data["monthly_avg_humidity"] = monthly_weather_data[4] # # # test_data="pd.read_csv("GROUP_OF_DATASETS/SWEETS.csv") # # test_data=test_data.rename(columns={0:"weekend"}) # # test_data=test_data.drop(test_data["quantity"]) # print(final_data.head()) if group == "ALCOHOL" or "KETCH_CONCETRATE_MUSTARD_MAJO_HORSERADISH" or "SPICES" or "GENERAL" or "BREAD" or "CHEWING_GUM_LOLIPOPS" or "GENERAL_FOOD": loaded_model = load_model("MODELS/gb") elif group == "COFFEE TEA" or "CIGARETTES" or "CHIPS_FLAKES" or "ICE_CREAMS_FROZEN" or "POULTRY" or "SWEETS": loaded_model = load_model("MODELS/extreme_gb") elif group == "GROATS_RICE_PASTA" or "OCCASIONAL": loaded_model = load_model("MODELS/adab") elif group == "CHEMISTRY" or "GENERAL_ITEMS" or "VEGETABLES": loaded_model = load_model("MODELS/rf") elif group == "DAIRY_CHESSE": loaded_model = load_model("MODELS/catb") pred = predict_model(loaded_model, data=final_data) # print(pred.head()) final_sales = (np.exp(pred["Label"])) final_sales = np.round(final_sales, 0) pred['Label'] = final_sales output = pred s = "" for index, row in output.iterrows(): s += 'Quantity of product {} predicted is {}.'.format( row['name'], row['Label']) s += "\n\n\n" print(s) return render_template('index.html', prediction_text=s)
def predict_api(): ''' For direct API calls trought request ''' data = request.get_json(force=True) date = data.Date #enter the month and year for which you want to predict the sales of the product [EXAMPLE : 02-2020 (FEB 2020)]: " converted_datetime = pd.to_datetime(date).date() def dateaddition(converted_datetime): if int(str(converted_datetime).split("-")[0]) % 4 == 0: if (int(str(converted_datetime).split("-")[1])) == 2: added_days = 29 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 30 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 30 elif int(str(converted_datetime).split("-")[0]) % 4 != 0: if (int(str(converted_datetime).split("-")[1])) == 2: added_days = 28 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 30 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) <= 6: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 31 elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int( str(converted_datetime).split("-")[1]) >= 7: added_days = 30 return added_days end_date = pd.to_datetime(converted_datetime + timedelta(dateaddition(converted_datetime) - 1)).date().strftime("%d-%b-%Y") start_date = converted_datetime.strftime("%d-%b-%Y") print(start_date, " ", end_date) frequency = 24 start_date = start_date end_date = end_date api_key = '12b2c18a34194a8ca93113127200405' location_list = ['Mlawa'] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) monthly_weather_data = pd.read_csv("Month_Weather_data.csv") monthly_weather_data = monthly_weather_data.drop([ "date_time", "totalSnow_cm", "sunHour", "uvIndex.1", "uvIndex", "moon_illumination", "moonrise", "moonset", "sunrise", "DewPointC", "sunset", "WindChillC", "WindGustKmph", "precipMM", "pressure", "visibility", "winddirDegree", "windspeedKmph", "tempC" ], axis=1) monthly_weather_data["avg_temp"] = (monthly_weather_data["maxtempC"] + monthly_weather_data["mintempC"]) / 2 monthly_weather_data = monthly_weather_data.drop(["maxtempC", "mintempC"], axis=1) monthly_weather_data = monthly_weather_data[[ "avg_temp", "FeelsLikeC", "HeatIndexC", "cloudcover", "humidity", ]] # monthly_weather_data=monthly_weather_data[["avg_temp","FeelsLikeC","HeatIndexC","cloudcover","humidity","ishol/week"]] def cat_heat(heatindex): if heatindex < -2: return (0) elif heatindex >= -1 and heatindex <= 14: return (1) else: return (2) def cat_cloud(cloudcover): # monthly_cloudcover=[] # for cloudcover in data["cloudcover"]: if cloudcover < 25: return (0) elif cloudcover >= 25 and cloudcover < 50: return (1) elif cloudcover >= 50 and cloudcover < 75: return (1) else: return (3) def mean_data(data): values = [] for key, value in data.iteritems(): values.append(value.mean()) print(key) return values monthly_averages = np.around(mean_data(monthly_weather_data), 2) monthly_averages[2] = cat_heat(monthly_averages[2]) monthly_averages[3] = cat_cloud(monthly_averages[3]) print(monthly_averages) loaded_model = load_model("Final_Mod") unseen_data = pd.read_csv("GROUP_OF_DATASETS/SWEETS.csv") prediction = predict_model(loaded_model, data=unseen_data.head(5)) output = prediction print(jsonify(output)) return jsonify(output)
def getHistoricalData(location): retrieve_hist_data(api_key, [location], start_date, end_date, frequency, location_label=False, export_csv=True, store_df=False)
# ignore all future warnings simplefilter(action='ignore') app = Flask(__name__, template_folder='template') model = pickle.load(open('models/model.pkl', 'rb')) START_DATE = '15-Nov-2019' END_DATE = '15-FEB-2020' API_KEY = 'b1ec70731e66454d9ec62250201602' LOCATION_LIST = ['pune'] hist_weather_data = retrieve_hist_data(API_KEY, LOCATION_LIST, START_DATE, END_DATE, location_label=False, export_csv=True, store_df=True) @app.route('/') def app_status(): return "Application is up and running." @app.route('/form') def show_form(): return render_template('form.html')
def main(): dropbox_api_key = os.environ['DROPBOX_ED_API'] dbx = dropbox.Dropbox(dropbox_api_key) dbx.users_get_current_account() print('connected to dropbox') df = pd.read_csv( 'https://www.dropbox.com/s/keafvwlkboedkdm/jghDailyVisits.csv?dl=1') df.ds = pd.to_datetime(df.ds) print('jgh daily visits loaded') stat_days_df = pd.read_csv( 'https://www.dropbox.com/s/hj3byufwtypi8d3/statdays.csv?dl=1') ramq_stat_days = stat_days_df['RAMQ'] ramq_stat_days = ramq_stat_days.dropna() jgh_stat_days = stat_days_df['JGH'] jgh_stat_days = jgh_stat_days.dropna() ramq_stat_days = pd.DataFrame({ 'holiday': 'ramq', 'ds': ramq_stat_days.to_list(), 'lower_window': 0, 'upper_window': 1, }) jgh_stat_days = pd.DataFrame({ 'holiday': 'jgh', 'ds': jgh_stat_days.to_list(), 'lower_window': 0, 'upper_window': 1, }) holidays = pd.concat((ramq_stat_days, jgh_stat_days)) print('stat days prepared') # Get all Montreal Canadiens games in our time frame (2012-present) from the NHL's REST API hockey_df = pd.read_csv( 'https://www.dropbox.com/s/g00ih64ndksshwr/montrealHockey.csv?dl=1') hockey_df.dateTime = pd.to_datetime(hockey_df.dateTime) # Separate the games by Home or Away home_hockey_df = hockey_df[hockey_df['homeTeam'] == 'Montreal Canadiens'] away_hockey_df = hockey_df[hockey_df['awayTeam'] == "Montreal Canadiens"] # Format them as Prophet expects home_hockey = pd.DataFrame({ 'holiday': 'homeHockey', 'ds': pd.to_datetime(home_hockey_df['dateTime']), 'lower_window': 0, 'upper_window': 1, }) away_hockey = pd.DataFrame({ 'holiday': 'awayHockey', 'ds': pd.to_datetime(away_hockey_df['dateTime']), 'lower_window': 0, 'upper_window': 1, }) # Add the hockey games to our stat day data holidays = pd.concat( (home_hockey, away_hockey, ramq_stat_days, jgh_stat_days)) print('hockey games added') # Load our weather data from Dropbox weather_df = pd.read_csv( 'https://www.dropbox.com/s/688u8aw6k0eqzqb/montrealDailyWeather.csv?dl=1') weather_df['ds'] = pd.to_datetime(weather_df['ds']) print('old weather added') # Fetch whicher days are missing, and 2 days into the future (weather forecast), which is the maximum the WWO API let's us access through the this API # The Data is saved as a csv file called 'Montreal' frequency = 24 start_date = (weather_df.ds.max()-timedelta(days=2) ).date().strftime("%d-%b-%Y").upper() end_date = (datetime.now(pytz.utc)).astimezone( pytz.timezone('US/Eastern')).date().strftime("%d-%b-%Y").upper() weather_api_key = os.environ['WEATHER_API'] api_key = weather_api_key location_list = ['Montreal'] retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=False) missing_weather_df = pd.read_csv('Montreal.csv') missing_weather_df['date_time'] = pd.to_datetime( missing_weather_df['date_time']) missing_weather_df = missing_weather_df.drop( ['moonrise', 'moonset', 'sunrise', 'sunset'], axis=1) missing_weather_df = missing_weather_df.rename(columns={"date_time": "ds"}) print('new weather fetched') # Concatenate the old weather data with the missing weather data final_weather_df = pd.concat([weather_df, missing_weather_df]) final_weather_df = final_weather_df.drop_duplicates( subset='ds', keep="last") final_weather_df.to_csv('montrealDailyWeather.csv', index=False) upload(dbx, 'montrealDailyWeather.csv', '', '', 'montrealDailyWeather.csv', overwrite=True) regressors = final_weather_df.columns.to_list() regressors.remove('ds') df.ds = pd.to_datetime(df.ds) final_df = pd.merge(df, final_weather_df, on='ds') print('starting to build model') # Instantiate our model with our stat day and hockey variables saved in the 'holidays' dataframe m = Prophet(holidays=holidays, seasonality_mode='multiplicative', changepoint_prior_scale=0.5, changepoint_range=0.85) # Add Prophet's built-in holidays for Canada m.add_country_holidays(country_name='CA') # Add our 20 weather variables as regressors for regressor in regressors: m.add_regressor(regressor) # Fit our model to our data # m.fit(final_df) # Fit our model to data from before 2019 m.fit(final_df) print('finished fitting model') import pickle pkl_path = "jgh-prophet-daily.pkl" with open(pkl_path, "wb") as f: # Pickle the 'Prophet' model using the highest protocol available. pickle.dump(m, f) upload(dbx, 'jgh-prophet-daily.pkl', '', '', 'jgh-prophet-daily.pkl', overwrite=True)
def main(): dropbox_api_key = os.environ['DROPBOX_ED_API'] dbx = dropbox.Dropbox(dropbox_api_key) dbx.users_get_current_account() print('connected to dropbox') df = pd.read_csv( 'https://www.dropbox.com/s/fqsdx1ovqsljwqa/jghOccupancy.csv?dl=1') df.ds = pd.to_datetime(df.ds) print('jgh occupancy loaded') stat_days_df = pd.read_csv( 'https://www.dropbox.com/s/hj3byufwtypi8d3/statdays.csv?dl=1') # Load our weather data from Dropbox weather_df = pd.read_csv( 'https://www.dropbox.com/s/omkh9t1lrg5k914/montrealHourlyWeather.csv?dl=1' ) weather_df['ds'] = pd.to_datetime(weather_df['ds']) print('old weather added') # Fetch whicher days are missing, and 2 days into the future (weather forecast), which is the maximum the WWO API let's us access through the this API # The Data is saved as a csv file called 'Montreal' frequency = 1 start_date = (weather_df.ds.max() - timedelta(days=2)).date().strftime("%d-%b-%Y").upper() end_date = (datetime.now(pytz.utc)).astimezone( pytz.timezone('US/Eastern')).date().strftime("%d-%b-%Y").upper() weather_api_key = os.environ['WEATHER_API'] api_key = weather_api_key location_list = ['Montreal'] retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=False) missing_weather_df = pd.read_csv('Montreal.csv') missing_weather_df['date_time'] = pd.to_datetime( missing_weather_df['date_time']) missing_weather_df = missing_weather_df.drop( ['moonrise', 'moonset', 'sunrise', 'sunset'], axis=1) missing_weather_df = missing_weather_df.rename(columns={"date_time": "ds"}) print('new weather fetched') # Concatenate the old weather data with the missing weather data final_weather_df = pd.concat([weather_df, missing_weather_df]) final_weather_df = final_weather_df.drop_duplicates(subset='ds', keep="last") final_weather_df.to_csv('montrealHourlyWeather.csv', index=False) upload(dbx, 'montrealHourlyWeather.csv', '', '', 'montrealHourlyWeather.csv', overwrite=True) regressors = final_weather_df.columns.to_list() regressors.remove('ds') df.ds = pd.to_datetime(df.ds) final_df = pd.merge(df, final_weather_df, on='ds') print('starting to build model') # Instantiate our model with our stat day and hockey variables saved in the 'holidays' dataframe m = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=0.5, changepoint_range=0.85) # Add Prophet's built-in holidays for Canada m.add_country_holidays(country_name='CA') # Add our 20 weather variables as regressors for regressor in regressors: m.add_regressor(regressor) # Fit our model to our data # m.fit(final_df) # Fit our model to data from before 2019 m.fit(final_df) print('finished fitting model') import pickle pkl_path = "jgh-prophet-occupancy.pkl" with open(pkl_path, "wb") as f: # Pickle the 'Prophet' model using the highest protocol available. pickle.dump(m, f) upload(dbx, 'jgh-prophet-occupancy.pkl', '', '', 'jgh-prophet-occupancy.pkl', overwrite=True)
if len(sys.argv[1:]) > 3: sure = input( "You have entered more than three zip codes, are you certain you'd like to proceed? y/n: " ) if sure.lower() == "n" or sure.lower() == "no": sys.exit() count = 0 for code in locations: if len(code) != 5: count += 1 if count > 0: raise ValueError( f"Not a valid zip code; locations array contains value with len < 5") sys.exit(5) f = open("lexicon.txt", "a") for code in locations: input_ = input(f"{code} : location ") f.write(f"\n{code} : {input_}") hist_weather_data = retrieve_hist_data(WEATHER_KEY, location_list=locations, start_date=start_date, end_date=end_date, frequency=frequency, location_label=False, export_csv=True, store_df=True)
def retrieve_data(config=config): deaths_df = get_melt_clean( url= 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', value_name='deaths', use_local=config['use_local'], data_dir=config['data_dir']) cases_df = get_melt_clean( url= 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', value_name='cases', use_local=config['use_local'], data_dir=config['data_dir']) recovered_df = get_melt_clean( url= 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv', value_name='recoveries', use_local=config['use_local'], data_dir=config['data_dir']) join_cols = config['index_col'] full_df = pd.merge(cases_df, deaths_df, on=join_cols) full_df = pd.merge(full_df, recovered_df, on=join_cols) # countries with at least 100 cases or 1 death filtered_df = filter_provinces(full_df, 1, 50) filtered_df['new_deaths'] = filtered_df.groupby( ['Country/Region', 'Province/State'])['deaths'].diff().clip(lower=0) filtered_df['new_cases'] = filtered_df.groupby( ['Country/Region', 'Province/State'])['cases'].diff().clip(lower=0) # calculate active cases filtered_df['active_cases'] = filtered_df['cases'] - filtered_df[ 'deaths'] - filtered_df['recoveries'] filtered_df['new_cases_as_percent_of_active'] = filtered_df[ 'new_cases'] / filtered_df['active_cases'] # filtered_df = add_dates_to_index(filtered_df, -14, join_cols) filtered_df = filtered_df.set_index(join_cols).select_dtypes( include='number').fillna(0) # chang index to lat/long and date for use with weather filtered_df.reset_index(['Country/Region', 'Province/State'], inplace=True) # load local weather cache and check of missing location/dates try: hist_weather_df_cache = pd.read_csv(config['data_dir'] + '/wwo_cache.csv') hist_weather_df_cache['date'] = pd.to_datetime( hist_weather_df_cache['date']) hist_weather_df_cache = hist_weather_df_cache.set_index( ['Lat', 'Long', 'date']) assert hist_weather_df_cache.index.is_unique except FileNotFoundError: hist_weather_df_cache = pd.DataFrame() # get unique missing lat/long date tuples missing_indexes = filtered_df.index.difference(hist_weather_df_cache.index) if len(missing_indexes) > 0: missing_val_df = filtered_df.loc[missing_indexes] missing_val_agg = missing_val_df.reset_index().groupby( ['Lat', 'Long']).aggregate(start_date=('date', 'min'), end_date=('date', 'max')).reset_index() missing_val_agg['lat_long_string'] = [ str(x) + ',' + str(y) for x, y in zip(missing_val_agg['Lat'], missing_val_agg['Long']) ] # retrieve missing weather data hist_weather_list = [] for row in missing_val_agg.itertuples(): try: hist_weather_list_loc = retrieve_hist_data( api_key=config['weather_api_key'], location_list=[row.lat_long_string], start_date=row.start_date, end_date=row.end_date, frequency=24, location_label=False, export_csv=False, store_df=True, response_cache_path='woo_cache') hist_weather_list.extend(hist_weather_list_loc) except requests.HTTPError: print( "exceded daily request limit, saving retrieved and exiting" ) break except socket.timeout: print("timed out, saving retrieved and exiting") break except urllib.error.URLError: print('network issue, saving retrieved and exiting') break # TODO: need to check cached responses even if there's a current connection failure. There's some mixed up dependencies though. Caches are checked inside wwo_hist, but this is feeding requests one by one if len(hist_weather_list) > 0: hist_weather_df_new = pd.concat(hist_weather_list) hist_weather_df_new['Lat'] = hist_weather_df_new['location'].apply( lambda x: x.split(',')[0]).astype('float') hist_weather_df_new['Long'] = hist_weather_df_new[ 'location'].apply(lambda x: x.split(',')[1]).astype('float') hist_weather_df_new.rename(columns={'date_time': 'date'}, inplace=True) hist_weather_df_new.set_index(['Lat', 'Long', 'date'], inplace=True) hist_weather_df = pd.concat( [hist_weather_df_cache, hist_weather_df_new], verify_integrity=True) hist_weather_df.to_csv(config['data_dir'] + '/wwo_cache.csv') else: print("unable to retrieve any new weather data.") hist_weather_df = hist_weather_df_cache hist_weather_df = hist_weather_df.infer_objects() joined_df = filtered_df.join(hist_weather_df, how='inner') joined_df.to_csv(config['data_dir'] + '/prepped_data.csv') joined_df.to_pickle(config['data_dir'] + '/prepped_data.pkl')
def forecast(city_name): os.chdir(r"C:\Users\Administrator\Desktop") frequency = 3 start_date = '1-JAN-2019' end_date = '1-JAN-2020' api_key = 'e60a5f5f96574a33947210842201502' #city_name = input('Enter city name: ') location_list = [city_name] hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label=False, export_csv=True, store_df=True) path = "C:\\Users\\Administrator\\Desktop\\" data = pd.read_csv(path + city_name + ".csv") # drop or delete the unnecessary columns in the data. data = data.drop([ "date_time", 'maxtempC', 'DewPointC', 'mintempC', 'sunHour', 'moon_illumination', 'moonrise', 'moonset', 'sunrise', 'sunset', 'HeatIndexC', 'WindChillC', 'WindGustKmph', 'totalSnow_cm' ], axis=1, inplace=False) data.to_csv(city_name + '.csv') params = { 'access_key': '7f31a3c1baed8dddc5b06a0448f4b534', 'query': city_name } api_result = requests.get('http://api.weatherstack.com/current', params) arr = [] api_response = api_result.json() print('\n') print(u'Given City Name: %s' % (api_response['location']['name'])) #a=api_response['location']['name'] #these variables a to k can be returned to get the current details print(u'Current temperature is %d℃' % (api_response['current']['temperature'])) a = api_response['current']['temperature'] print(u'Current Humidity is %d' % (api_response['current']['humidity'])) b = api_response['current']['humidity'] print(u'Current Pressure is %d Pascal' % (api_response['current']['pressure'])) c = api_response['current']['pressure'] print(u'Current Cloud Cover is %d' % (api_response['current']['cloudcover'])) d = api_response['current']['cloudcover'] print(u'Current Precipitation is %d' % (api_response['current']['precip'])) e = api_response['current']['precip'] print(u'Current Visibility is %d' % (api_response['current']['visibility'])) f = api_response['current']['visibility'] print(u'Current Wind Speed is %d' % (api_response['current']['wind_speed'])) g = api_response['current']['wind_speed'] print(u'Current Feels Like is %d' % (api_response['current']['feelslike'])) h = api_response['current']['feelslike'] print(u'Current Wind Direction is %s' % (api_response['current']['wind_dir'])) i = api_response['current']['wind_arr'] print(u'Current UV Index is %d' % (api_response['current']['uv_index'])) j = api_response['current']['uv_index'] print(u'Current Wind Degree is %d' % (api_response['current']['wind_degree'])) k = api_response['current']['wind_degree'] # save the data in a csv file path = "C:\\Users\\Administrator\\Desktop\\" data = pd.read_csv(path + city_name + ".csv") #for pressure X = data.drop(['pressure'], axis=1) Y = data['pressure'] Y = Y.values.reshape(-1, 1) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01) model = lm().fit(x_train, y_train) pressure = model.predict(x_test) print(pressure, 'This is the pressure in pascal for the input') #for temperature X = data.drop(['tempC'], axis=1) Y = data['tempC'] Y = Y.values.reshape(-1, 1) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01) model = lm().fit(x_train, y_train) temp = model.predict(x_test) print(temp, 'This is the temperature in degrees C for the input') #for humidity X = data.drop(['humidity'], axis=1) Y = data['humidity'] Y = Y.values.reshape(-1, 1) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01) model = lm().fit(x_train, y_train) hum = model.predict(x_test) print(hum, 'This is the humidity for the input') pressure = str(pressure) temp = str(temp) hum = str(hum) return temp, pressure, hum
from wwo_hist import retrieve_hist_data import pandas as ps import plotly.express as px API_KEY = '' location_list = ['beirut'] start_date = '01-JAN-2009' end_date = '10-OCT-2019' freq = 24 data = retrieve_hist_data(API_KEY, location_list, start_date, end_date, freq, location_label = False, export_csv = True, store_df = True)