def weather(start_date: str, end_date: str, zip_code: list, frequency: int, location_label=False, export_csv=True, store_df=True, api_key='62c4f496efb147c1b2160953202406') -> pd.DataFrame:
    """returns a pandas DataFrame that contains:
    location, date, average temperature, humidity, wind speed, air pressure, uvIndex

    Args:
        start_date (str): start date inclusive; '01-Jan-2020'
        end_date (str): end date inclusive; '02-Jan-2020'
        zip_code (list): list of zip codes
        frequency (int): frequency in hours, usually 24
        location_label (bool, optional): no clue. Defaults to False.
        export_csv (bool, optional): keep as True. Defaults to True.
        store_df (bool, optional): keep as True. Defaults to True.
        api_key (str, optional): obtained from website; valid for 90 days. Defaults to '62c4f496efb147c1b2160953202406'.

    Returns:
        pd.DataFrame: contains data listed above
    """

    retrieve_hist_data(api_key, [zip_code], start_date, end_date, frequency,
                       location_label=location_label, export_csv=True, store_df=store_df)

    for code in zip_code:
        data = pd.read_csv(code + '.csv', header=0)
        os.remove(code + '.csv')

    print(f'deleted file {zip_code}.csv')

    return data[['location', 'date_time', 'tempC', 'humidity', 'windspeedKmph', 'pressure', 'uvIndex']] # choose which to return
Ejemplo n.º 2
0
def get_weather_data(config):
    """CSV will be written to the current directory."""
    retrieve_hist_data(config['api_key'],
                       config['location_list'],
                       config['start_date'],
                       config['end_date'],
                       config['frequency'],
                       location_label=config['location_label'],
                       export_csv=True,
                       store_df=False)
    return None
Ejemplo n.º 3
0
def get_weather():
    frequency=3
    datex = datetime.datetime.now()
    start_date = str(datex.month-2)+'-'+str(datex.day)+'-'+str(datex.year)
    end_date = str(datex.month)+'-'+str(datex.day)+'-'+str(datex.year)
    api_key = '06909010f11242eaae354051202907'
    location = rgb = request.get_json()
    location_list = [(location["location"])]

    hist_weather_data = retrieve_hist_data(api_key,
                                    location_list,
                                    start_date,
                                    end_date,
                                    frequency,
                                    location_label = False,
                                    export_csv = True,
                                    store_df = True)
    data = pd.read_csv(str(location_list[0])+".csv")
    maxTempValue = round((data["maxtempC"].mean(axis = 0)),2) 
    minTempValue = round((data["mintempC"].mean(axis = 0)),2)
    feelsLikeValue = round((data["FeelsLikeC"].mean(axis = 0)),2) 
    humidityValue = round((data["humidity"].mean(axis = 0)),2)
    rainfallValue = round((data["precipMM"].mean(axis = 0)),2)
    tempValue = round((data["tempC"].mean(axis = 0)),2)
    return "{}".format( str(humidityValue)+" " + str(tempValue) +" "+ str(rainfallValue))
Ejemplo n.º 4
0
def prp():
    date = request.args.get('date_string')
    zipcode = request.args.get('zipcode_str')

    os.chdir("/var/www/FlaskApp/FlaskApp")

    frequency = 24
    start_date = '01-JAN-2010'
    end_date = '01-JAN-2021'
    api_key = '0909c9292f294476aba41920211701'
    location_list = ['97603']
    zipcode = "97603"

    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=True,
                                           store_df=True)

    if os.path.isfile(f"/var/www/FlaskApp/FlaskApp/{zipcode}.csv"):
        data3 = "PRP API call Success"
        prplist = plantrec.prpmain(date)
    else:
        data3 = "PRP API call Failure"

    temp = {"date": date, "zip": zipcode, "prp": prplist}
    y = json.dumps(temp)
    return y
 def call_api(self):
     hist_weather_data = retrieve_hist_data(
         api_key=self.api_key,
         location_list=self.location_list,
         start_date=self.start_date,
         end_date=self.end_date,
         frequency=self.frequency,
         location_label=False,
         export_csv=True,
         store_df=True)
Ejemplo n.º 6
0
def get_wwo_weather(lat_lon_list, start_date, end_date, frequency, key):
    hist_weather_data = retrieve_hist_data(key, lat_lon_list, start_date, end_date, frequency=frequency,
                                           location_label=False, export_csv=True, store_df=True)
    data = hist_weather_data[0].drop(
        ['sunrise', 'sunset', 'moonrise', 'moonset', 'moon_illumination', 'FeelsLikeC', 'HeatIndexC', 'WindChillC'],
        axis=1)
    wu = pd.DataFrame(np.cos(data['windspeedKmph'].to_numpy().astype(np.float32)))
    wv = pd.DataFrame(np.sin(data['windspeedKmph'].to_numpy().astype(np.float32)))
    data['wind_u'] = wu
    data['wind_v'] = wv
    return hist_weather_data
def get_clima():
    os.chdir("../Data")
    frequency = 1
    start_date = '01-JAN-2014'
    end_date = '30-AUG-2019'
    api_key = '28f7f02aa28d4afe9dc215223190509'
    location_list = ['mexico_city']
    hist_weather_data = retrieve_hist_data(api_key, location_list, start_date, end_date, frequency, location_label = False, export_csv=True, store_df = True)
    clima = pd.read_csv('../Data/mexico_city.csv')
    clima.drop(columns=['maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour', 'uvIndex.1', 'moonrise', 'moonset', 'sunrise', 'sunset', 'HeatIndexC', 'WindChillC', 'WindGustKmph'], inplace=True)
    clima.columns = ['fecha_hechos', 'uv', 'ilu_luna', 'punto_rocio', 'temp_sentir', 'nubosidad', 'humedad', 'precipitacion', 'presion', 'temperatura', 'visibilidad', 'dir_viento', 'vel_viento']
    return clima
def weather(start_date: str, end_date: str, zip_codes: list, frequency: int, api_key='f098932734bd498196c175043201708') -> None:
    """writes to a csv file in the meteorological data directory containing 1 year's worth of meteorological data

    Args:
        start_date (str): start date inclusive; '01-Jan-2020'
        end_date (str): end date inclusive; '02-Jan-2020'
        zip_code (list): list of zip codes
        frequency (int): frequency in hours, usually 24
        api_key (str, optional): obtained from website. Defaults to '7cc48f71b5d94d6abb6191130203007'.
    """

    retrieve_hist_data(api_key, zip_codes, start_date, end_date, frequency,
                       location_label=False, export_csv=True, store_df=True)

    for code in zip_codes:
        data = pd.read_csv(code + '.csv', header=0)

        with open(r'Data Collection\Data\Meteorological\\' + code + '.csv', 'w', newline='') as f:
            f.write(data[['location', 'date_time', 'tempC', 'humidity',
                          'windspeedKmph', 'pressure', 'uvIndex']].to_csv())

        os.remove(code + '.csv')
Ejemplo n.º 9
0
def get_data(place):
    location_list = [place]
    files = os.listdir(DATA_URL)
    file = place + '.csv'
    if file in files:
        pass
    else:
        hist_weather_data = retrieve_hist_data(api_key,
                                               location_list,
                                               start_date,
                                               end_date,
                                               frequency,
                                               location_label=False,
                                               export_csv=True,
                                               store_df=True)
Ejemplo n.º 10
0
def load_temp(name, end_date, key, start_date="6-DEC-2009", frequency=24):
    api_key = key
    location_list = [name]
    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=True,
                                           store_df=True)

    trj = hist_weather_data[0].reset_index()
    trj = trj.drop(["index"], axis=1)
    return trj  #returns dataframe
Ejemplo n.º 11
0
def get_initial_data_wwo(locations, start_date, end_date):
    frequency = 24
    api_key_1 = '1d5cc937de61480aaf903645202201'
    api_key_2 = '340dfd23ecc94a4a8c631942202301'
    api_key_3 = 'e9c547b9ae304c07b9834623202301'
    try:
        api_key = api_key_3
        hist_weather_data = retrieve_hist_data(api_key,
                                               locations,
                                               start_date,
                                               end_date,
                                               frequency,
                                               location_label=False,
                                               export_csv=True,
                                               store_df=True)
    except HTTPError:
        print('bad api key, try another one')
def collecting_data():
    today_date = date.today()
    yesterday_date = today_date - timedelta(1)
    os.chdir(r"theweatherforecast")
    frequency = 3
    start_date = '2015-01-01'
    end_date = yesterday_date.strftime('%Y-%B-%d')
    api_key = '74a87fc92c714e70aa1112002210101'
    location_list = ['Rawalpindi']
    _ = retrieve_hist_data(api_key,
                           location_list,
                           start_date,
                           end_date,
                           frequency,
                           location_label=False,
                           export_csv=True,
                           store_df=True)
Ejemplo n.º 13
0
def get_weather_data(counties, start_date, end_date, frequency, output_dir):
    '''
    counties: (list) of counties, example: ['nyc', 'queens']
    start_date: (str) date, example: '01-JAN-2018'
    end_date: (str) date, example: '31-DEC-2018'
    frequency: (int) frequency of historical data, example: 1 for hourly data
    output_dir: (str) path to directory where you want to store the weather data
    '''
    os.chdir(output_dir)
    for county in counties:
        print(f"Getting weather data for {county}")
        api_key = '<your-api-key>'  # enter your api key for wwo
        location_list = [county]
        hist_weather_data = retrieve_hist_data(api_key,
                                               location_list,
                                               start_date,
                                               end_date,
                                               frequency,
                                               location_label=False,
                                               export_csv=True,
                                               store_df=True)
 def fetch(self) -> DataFrame:
     try:
         print("Generating history data for {} from {} to {}, Retry: {}".
               format(self.__location_list, self.__start_date,
                      self.__end_date, self.__retry))
         data = retrieve_hist_data(self.__api_key,
                                   self.__location_list,
                                   self.__start_date,
                                   self.__end_date,
                                   self.__freq,
                                   location_label=False,
                                   export_csv=False,
                                   store_df=True)
         sleep(1)
         return data[0]
         print("Weather history has been Generated")
     except:
         print("Unknown Error while Processing, Processing it again.")
         self.__retry = self.__retry + 1
         if self.__retry <= self.__max_retry:
             self.fetch()
Ejemplo n.º 15
0
def weatherDragons(coordinates):
    # call the weather api and get some weather info
    frequency = 24
    start_date = '01-OCT-2018'  #'01-JAN-2018'
    end_date = '01-NOV-2018'
    api_key = '49ba24e2b3b8412a9e501452200811'
    location_list = [str(coordinates[1]) + "," + str(coordinates[0])]

    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=False,
                                           store_df=True)

    # print(hist_weather_data[0])
    data = hist_weather_data[0]
    data['simple_date'] = pd.to_datetime(data['date_time']).dt.to_period('M')
    print(data)

    # TODO group by simple_date and make sum / max for some columns
    print(data.columns)
Ejemplo n.º 16
0
good_zips.remove('07189')
good_zips.remove('07842')
good_zips.remove('07607')
good_zips.remove('08878')

# find out where we left off when switching api keys
files = 0
file_list = list()
for file in os.listdir(dir):
    if file.endswith(".csv"):
        files += 1
        file_list.append(file.replace('.csv', ''))

# print to screen to see
print("# of files: " + str(files))

# getting forecasted weather data for all the zip codes
# gets daily weather forecasts
frequency = 24
start_date = '1-JAN-2009'
end_date = '31-MAR-2020'
api_key = 'cbb45e1ebda64725a11200528201004'
location_list = good_zips
hist_weather_data = retrieve_hist_data(api_key,
                                       location_list,
                                       start_date,
                                       end_date,
                                       frequency,
                                       location_label=False,
                                       export_csv=True,
                                       store_df=True)
Ejemplo n.º 17
0
def predict():
    '''
    For rendering results on HTML GUI
    '''
    int_features = [x for x in request.form.values()]
    print(int_features)

    date = int_features[0]

    group = int_features[1]

    converted_datetime = pd.to_datetime(date).date()

    # this function makes sure that when the user enters a date, it calculates the ending of the month the user entered
    def dateaddition(converted_datetime):
        if int(str(converted_datetime).split("-")[0]) % 4 == 0:
            if (int(str(converted_datetime).split("-")[1])) == 2:
                added_days = 29
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 30
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 30
        elif int(str(converted_datetime).split("-")[0]) % 4 != 0:
            if (int(str(converted_datetime).split("-")[1])) == 2:
                added_days = 28
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 30
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 30

        return added_days

    #converting the dates to strings to pass them through the weather-collecting api
    start_date = converted_datetime.strftime("%d-%b-%Y")

    # calculating the end date based on the function dateaddition
    end_date = pd.to_datetime(converted_datetime +
                              timedelta(dateaddition(converted_datetime) -
                                        1)).date()

    #if the user enters the present month, the end date will be calculated based on the present day
    if end_date > datetime.now().date():
        end_date = datetime.now().date().strftime("%d-%b-%Y")
    else:
        end_date = end_date.strftime("%d-%b-%Y")

    # this is an api to collect the necessary weather data we need
    frequency = 24
    start_date = start_date
    end_date = end_date
    api_key = '12b2c18a34194a8ca93113127200405'
    location_list = ['Mlawa']
    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=True,
                                           store_df=True)

    #the weather data is stored in the file "Mlawa.csv", hence we read from that
    monthly_weather_data = pd.read_csv("Mlawa.csv")

    def final_weather(monthly_weather_data):
        #dropping the unecessary columns
        monthly_weather_data = monthly_weather_data.drop([
            "date_time", "totalSnow_cm", "sunHour", "uvIndex.1", "uvIndex",
            "moon_illumination", "moonrise", "moonset", "sunrise", "DewPointC",
            "sunset", "WindChillC", "WindGustKmph", "precipMM", "pressure",
            "visibility", "winddirDegree", "windspeedKmph", "tempC"
        ],
                                                         axis=1)
        monthly_weather_data["avg_temp"] = (
            monthly_weather_data["maxtempC"] +
            monthly_weather_data["mintempC"]) / 2
        monthly_weather_data = monthly_weather_data.drop(
            ["maxtempC", "mintempC"], axis=1)
        # rearranging the data
        monthly_weather_data = monthly_weather_data[[
            "avg_temp",
            "FeelsLikeC",
            "HeatIndexC",
            "cloudcover",
            "humidity",
        ]]
        values = []
        monthly_averages = []

        def mean_data(data):

            for key, value in data.iteritems():
                values.append(value.mean())
                print(key)
            return values

        #categorising the heat_index data into three different types
        def cat_heat(heatindex):
            if heatindex < -2:
                return (0)
            elif heatindex >= -1 and heatindex <= 14:
                return (1)
            else:
                return (2)

        #categorising the cloud cover into 4 different types
        def cat_cloud(cloudcover):
            if cloudcover < 25:
                return (0)
            elif cloudcover >= 25 and cloudcover < 50:
                return (1)
            elif cloudcover >= 50 and cloudcover < 75:
                return (1)
            else:
                return (3)

        monthly_averages = np.around(mean_data(monthly_weather_data), 2)
        monthly_averages[2] = cat_heat(monthly_averages[2])
        monthly_averages[3] = cat_cloud(monthly_averages[3])

        return monthly_averages

    monthly_weather_data = final_weather(monthly_weather_data)

    print(monthly_weather_data)

    final_data = pd.read_csv("GROUP_OF_ITEMS_FINAL/" + group.upper() + ".csv")

    final_data = final_data.drop(["Unnamed: 0"], axis=1)

    # print(final_data.head())

    final_data["ishol/week"] = 9
    final_data["group"] = group
    final_data["monthly_Avgtemp"] = monthly_weather_data[0]
    final_data["monthly_avg_FeelsLikeC"] = monthly_weather_data[1]
    final_data["monthly_avg_HeatIndexC"] = monthly_weather_data[2]
    final_data["monthly_avg_cloudcover"] = monthly_weather_data[3]
    final_data["monthly_avg_humidity"] = monthly_weather_data[4]

    # # # test_data="pd.read_csv("GROUP_OF_DATASETS/SWEETS.csv")
    # # test_data=test_data.rename(columns={0:"weekend"})
    # # test_data=test_data.drop(test_data["quantity"])

    # print(final_data.head())

    if group == "ALCOHOL" or "KETCH_CONCETRATE_MUSTARD_MAJO_HORSERADISH" or "SPICES" or "GENERAL" or "BREAD" or "CHEWING_GUM_LOLIPOPS" or "GENERAL_FOOD":
        loaded_model = load_model("MODELS/gb")

    elif group == "COFFEE TEA" or "CIGARETTES" or "CHIPS_FLAKES" or "ICE_CREAMS_FROZEN" or "POULTRY" or "SWEETS":
        loaded_model = load_model("MODELS/extreme_gb")

    elif group == "GROATS_RICE_PASTA" or "OCCASIONAL":
        loaded_model = load_model("MODELS/adab")

    elif group == "CHEMISTRY" or "GENERAL_ITEMS" or "VEGETABLES":
        loaded_model = load_model("MODELS/rf")

    elif group == "DAIRY_CHESSE":
        loaded_model = load_model("MODELS/catb")

    pred = predict_model(loaded_model, data=final_data)

    # print(pred.head())
    final_sales = (np.exp(pred["Label"]))
    final_sales = np.round(final_sales, 0)

    pred['Label'] = final_sales

    output = pred

    s = ""

    for index, row in output.iterrows():
        s += 'Quantity of product {} predicted is  {}.'.format(
            row['name'], row['Label'])
        s += "\n\n\n"

    print(s)
    return render_template('index.html', prediction_text=s)
Ejemplo n.º 18
0
def predict_api():
    '''
    For direct API calls trought request
    '''
    data = request.get_json(force=True)

    date = data.Date  #enter the month and year for which you want to predict the sales of the product [EXAMPLE : 02-2020 (FEB 2020)]: "

    converted_datetime = pd.to_datetime(date).date()

    def dateaddition(converted_datetime):
        if int(str(converted_datetime).split("-")[0]) % 4 == 0:
            if (int(str(converted_datetime).split("-")[1])) == 2:
                added_days = 29
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 30
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 30
        elif int(str(converted_datetime).split("-")[0]) % 4 != 0:
            if (int(str(converted_datetime).split("-")[1])) == 2:
                added_days = 28
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 30
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) <= 6:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 0 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 31
            elif int(str(converted_datetime).split("-")[1]) % 2 == 1 and int(
                    str(converted_datetime).split("-")[1]) >= 7:
                added_days = 30

        return added_days

    end_date = pd.to_datetime(converted_datetime +
                              timedelta(dateaddition(converted_datetime) -
                                        1)).date().strftime("%d-%b-%Y")

    start_date = converted_datetime.strftime("%d-%b-%Y")

    print(start_date, "   ", end_date)

    frequency = 24
    start_date = start_date
    end_date = end_date
    api_key = '12b2c18a34194a8ca93113127200405'
    location_list = ['Mlawa']
    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=True,
                                           store_df=True)

    monthly_weather_data = pd.read_csv("Month_Weather_data.csv")

    monthly_weather_data = monthly_weather_data.drop([
        "date_time", "totalSnow_cm", "sunHour", "uvIndex.1", "uvIndex",
        "moon_illumination", "moonrise", "moonset", "sunrise", "DewPointC",
        "sunset", "WindChillC", "WindGustKmph", "precipMM", "pressure",
        "visibility", "winddirDegree", "windspeedKmph", "tempC"
    ],
                                                     axis=1)

    monthly_weather_data["avg_temp"] = (monthly_weather_data["maxtempC"] +
                                        monthly_weather_data["mintempC"]) / 2

    monthly_weather_data = monthly_weather_data.drop(["maxtempC", "mintempC"],
                                                     axis=1)

    monthly_weather_data = monthly_weather_data[[
        "avg_temp",
        "FeelsLikeC",
        "HeatIndexC",
        "cloudcover",
        "humidity",
    ]]

    # monthly_weather_data=monthly_weather_data[["avg_temp","FeelsLikeC","HeatIndexC","cloudcover","humidity","ishol/week"]]

    def cat_heat(heatindex):

        if heatindex < -2:
            return (0)
        elif heatindex >= -1 and heatindex <= 14:
            return (1)
        else:
            return (2)

    def cat_cloud(cloudcover):
        # monthly_cloudcover=[]
        # for cloudcover in data["cloudcover"]:
        if cloudcover < 25:
            return (0)
        elif cloudcover >= 25 and cloudcover < 50:
            return (1)
        elif cloudcover >= 50 and cloudcover < 75:
            return (1)
        else:
            return (3)

    def mean_data(data):
        values = []
        for key, value in data.iteritems():
            values.append(value.mean())
            print(key)
            return values

    monthly_averages = np.around(mean_data(monthly_weather_data), 2)
    monthly_averages[2] = cat_heat(monthly_averages[2])
    monthly_averages[3] = cat_cloud(monthly_averages[3])

    print(monthly_averages)

    loaded_model = load_model("Final_Mod")
    unseen_data = pd.read_csv("GROUP_OF_DATASETS/SWEETS.csv")

    prediction = predict_model(loaded_model, data=unseen_data.head(5))

    output = prediction
    print(jsonify(output))

    return jsonify(output)
Ejemplo n.º 19
0
 def getHistoricalData(location):
     retrieve_hist_data(api_key, [location], start_date, end_date,
                        frequency, location_label=False, export_csv=True, store_df=False)
# ignore all future warnings
simplefilter(action='ignore')

app = Flask(__name__, template_folder='template')
model = pickle.load(open('models/model.pkl', 'rb'))

START_DATE = '15-Nov-2019'
END_DATE = '15-FEB-2020'
API_KEY = 'b1ec70731e66454d9ec62250201602'
LOCATION_LIST = ['pune']

hist_weather_data = retrieve_hist_data(API_KEY,
                                       LOCATION_LIST,
                                       START_DATE,
                                       END_DATE,
                                       location_label=False,
                                       export_csv=True,
                                       store_df=True)


@app.route('/')
def app_status():
    return "Application is up and running."


@app.route('/form')
def show_form():
    return render_template('form.html')

def main():

    dropbox_api_key = os.environ['DROPBOX_ED_API']
    dbx = dropbox.Dropbox(dropbox_api_key)
    dbx.users_get_current_account()
    print('connected to dropbox')

    df = pd.read_csv(
        'https://www.dropbox.com/s/keafvwlkboedkdm/jghDailyVisits.csv?dl=1')
    df.ds = pd.to_datetime(df.ds)
    print('jgh daily visits loaded')

    stat_days_df = pd.read_csv(
        'https://www.dropbox.com/s/hj3byufwtypi8d3/statdays.csv?dl=1')

    ramq_stat_days = stat_days_df['RAMQ']
    ramq_stat_days = ramq_stat_days.dropna()
    jgh_stat_days = stat_days_df['JGH']
    jgh_stat_days = jgh_stat_days.dropna()
    ramq_stat_days = pd.DataFrame({
        'holiday': 'ramq',
        'ds': ramq_stat_days.to_list(),
        'lower_window': 0,
        'upper_window': 1,
    })
    jgh_stat_days = pd.DataFrame({
        'holiday': 'jgh',
        'ds': jgh_stat_days.to_list(),
        'lower_window': 0,
        'upper_window': 1,
    })
    holidays = pd.concat((ramq_stat_days, jgh_stat_days))
    print('stat days prepared')

    # Get all Montreal Canadiens games in our time frame (2012-present) from the NHL's REST API
    hockey_df = pd.read_csv(
        'https://www.dropbox.com/s/g00ih64ndksshwr/montrealHockey.csv?dl=1')
    hockey_df.dateTime = pd.to_datetime(hockey_df.dateTime)

    # Separate the games by Home or Away
    home_hockey_df = hockey_df[hockey_df['homeTeam'] == 'Montreal Canadiens']

    away_hockey_df = hockey_df[hockey_df['awayTeam'] == "Montreal Canadiens"]

    # Format them as Prophet expects
    home_hockey = pd.DataFrame({
        'holiday': 'homeHockey',
        'ds': pd.to_datetime(home_hockey_df['dateTime']),
        'lower_window': 0,
        'upper_window': 1,
    })

    away_hockey = pd.DataFrame({
        'holiday': 'awayHockey',
        'ds': pd.to_datetime(away_hockey_df['dateTime']),
        'lower_window': 0,
        'upper_window': 1,
    })

    # Add the hockey games to our stat day data
    holidays = pd.concat(
        (home_hockey, away_hockey, ramq_stat_days, jgh_stat_days))
    print('hockey games added')

    # Load our weather data from Dropbox
    weather_df = pd.read_csv(
        'https://www.dropbox.com/s/688u8aw6k0eqzqb/montrealDailyWeather.csv?dl=1')
    weather_df['ds'] = pd.to_datetime(weather_df['ds'])
    print('old weather added')

    # Fetch whicher days are missing, and 2 days into the future (weather forecast), which is the maximum the WWO API let's us access through the this API
    # The Data is saved as a csv file called 'Montreal'
    frequency = 24
    start_date = (weather_df.ds.max()-timedelta(days=2)
                  ).date().strftime("%d-%b-%Y").upper()
    end_date = (datetime.now(pytz.utc)).astimezone(
        pytz.timezone('US/Eastern')).date().strftime("%d-%b-%Y").upper()
    weather_api_key = os.environ['WEATHER_API']
    api_key = weather_api_key
    location_list = ['Montreal']
    retrieve_hist_data(api_key,
                       location_list,
                       start_date,
                       end_date,
                       frequency,
                       location_label=False,
                       export_csv=True,
                       store_df=False)

    missing_weather_df = pd.read_csv('Montreal.csv')
    missing_weather_df['date_time'] = pd.to_datetime(
        missing_weather_df['date_time'])
    missing_weather_df = missing_weather_df.drop(
        ['moonrise', 'moonset', 'sunrise', 'sunset'], axis=1)
    missing_weather_df = missing_weather_df.rename(columns={"date_time": "ds"})
    print('new weather fetched')

    # Concatenate the old weather data with the missing weather data
    final_weather_df = pd.concat([weather_df, missing_weather_df])

    final_weather_df = final_weather_df.drop_duplicates(
        subset='ds', keep="last")

    final_weather_df.to_csv('montrealDailyWeather.csv', index=False)

    upload(dbx, 'montrealDailyWeather.csv', '', '',
           'montrealDailyWeather.csv', overwrite=True)

    regressors = final_weather_df.columns.to_list()
    regressors.remove('ds')

    df.ds = pd.to_datetime(df.ds)
    final_df = pd.merge(df, final_weather_df, on='ds')

    print('starting to build model')
    # Instantiate our model with our stat day and hockey variables saved in the 'holidays' dataframe
    m = Prophet(holidays=holidays, seasonality_mode='multiplicative',
                changepoint_prior_scale=0.5, changepoint_range=0.85)
    # Add Prophet's built-in holidays for Canada
    m.add_country_holidays(country_name='CA')
    # Add our 20 weather variables as regressors
    for regressor in regressors:
        m.add_regressor(regressor)
    # Fit our model to our data
    # m.fit(final_df)
    # Fit our model to data from before 2019
    m.fit(final_df)
    print('finished fitting model')

    import pickle
    pkl_path = "jgh-prophet-daily.pkl"
    with open(pkl_path, "wb") as f:
        # Pickle the 'Prophet' model using the highest protocol available.
        pickle.dump(m, f)

    upload(dbx, 'jgh-prophet-daily.pkl', '', '',
           'jgh-prophet-daily.pkl', overwrite=True)
def main():

    dropbox_api_key = os.environ['DROPBOX_ED_API']
    dbx = dropbox.Dropbox(dropbox_api_key)
    dbx.users_get_current_account()
    print('connected to dropbox')

    df = pd.read_csv(
        'https://www.dropbox.com/s/fqsdx1ovqsljwqa/jghOccupancy.csv?dl=1')
    df.ds = pd.to_datetime(df.ds)
    print('jgh occupancy loaded')

    stat_days_df = pd.read_csv(
        'https://www.dropbox.com/s/hj3byufwtypi8d3/statdays.csv?dl=1')

    # Load our weather data from Dropbox
    weather_df = pd.read_csv(
        'https://www.dropbox.com/s/omkh9t1lrg5k914/montrealHourlyWeather.csv?dl=1'
    )
    weather_df['ds'] = pd.to_datetime(weather_df['ds'])
    print('old weather added')

    # Fetch whicher days are missing, and 2 days into the future (weather forecast), which is the maximum the WWO API let's us access through the this API
    # The Data is saved as a csv file called 'Montreal'
    frequency = 1
    start_date = (weather_df.ds.max() -
                  timedelta(days=2)).date().strftime("%d-%b-%Y").upper()
    end_date = (datetime.now(pytz.utc)).astimezone(
        pytz.timezone('US/Eastern')).date().strftime("%d-%b-%Y").upper()
    weather_api_key = os.environ['WEATHER_API']
    api_key = weather_api_key
    location_list = ['Montreal']
    retrieve_hist_data(api_key,
                       location_list,
                       start_date,
                       end_date,
                       frequency,
                       location_label=False,
                       export_csv=True,
                       store_df=False)

    missing_weather_df = pd.read_csv('Montreal.csv')
    missing_weather_df['date_time'] = pd.to_datetime(
        missing_weather_df['date_time'])
    missing_weather_df = missing_weather_df.drop(
        ['moonrise', 'moonset', 'sunrise', 'sunset'], axis=1)
    missing_weather_df = missing_weather_df.rename(columns={"date_time": "ds"})
    print('new weather fetched')

    # Concatenate the old weather data with the missing weather data
    final_weather_df = pd.concat([weather_df, missing_weather_df])
    final_weather_df = final_weather_df.drop_duplicates(subset='ds',
                                                        keep="last")

    final_weather_df.to_csv('montrealHourlyWeather.csv', index=False)

    upload(dbx,
           'montrealHourlyWeather.csv',
           '',
           '',
           'montrealHourlyWeather.csv',
           overwrite=True)

    regressors = final_weather_df.columns.to_list()
    regressors.remove('ds')

    df.ds = pd.to_datetime(df.ds)
    final_df = pd.merge(df, final_weather_df, on='ds')

    print('starting to build model')
    # Instantiate our model with our stat day and hockey variables saved in the 'holidays' dataframe
    m = Prophet(seasonality_mode='multiplicative',
                changepoint_prior_scale=0.5,
                changepoint_range=0.85)
    # Add Prophet's built-in holidays for Canada
    m.add_country_holidays(country_name='CA')
    # Add our 20 weather variables as regressors
    for regressor in regressors:
        m.add_regressor(regressor)
    # Fit our model to our data
    # m.fit(final_df)
    # Fit our model to data from before 2019
    m.fit(final_df)
    print('finished fitting model')

    import pickle
    pkl_path = "jgh-prophet-occupancy.pkl"
    with open(pkl_path, "wb") as f:
        # Pickle the 'Prophet' model using the highest protocol available.
        pickle.dump(m, f)

    upload(dbx,
           'jgh-prophet-occupancy.pkl',
           '',
           '',
           'jgh-prophet-occupancy.pkl',
           overwrite=True)
Ejemplo n.º 23
0
if len(sys.argv[1:]) > 3:
    sure = input(
        "You have entered more than three zip codes, are you certain you'd like to proceed? y/n: "
    )
    if sure.lower() == "n" or sure.lower() == "no":
        sys.exit()

count = 0
for code in locations:
    if len(code) != 5:
        count += 1

if count > 0:
    raise ValueError(
        f"Not a valid zip code; locations array contains value with len < 5")
    sys.exit(5)

f = open("lexicon.txt", "a")
for code in locations:
    input_ = input(f"{code} : location ")
    f.write(f"\n{code} : {input_}")

hist_weather_data = retrieve_hist_data(WEATHER_KEY,
                                       location_list=locations,
                                       start_date=start_date,
                                       end_date=end_date,
                                       frequency=frequency,
                                       location_label=False,
                                       export_csv=True,
                                       store_df=True)
Ejemplo n.º 24
0
def retrieve_data(config=config):
    deaths_df = get_melt_clean(
        url=
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv',
        value_name='deaths',
        use_local=config['use_local'],
        data_dir=config['data_dir'])
    cases_df = get_melt_clean(
        url=
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv',
        value_name='cases',
        use_local=config['use_local'],
        data_dir=config['data_dir'])
    recovered_df = get_melt_clean(
        url=
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv',
        value_name='recoveries',
        use_local=config['use_local'],
        data_dir=config['data_dir'])

    join_cols = config['index_col']
    full_df = pd.merge(cases_df, deaths_df, on=join_cols)
    full_df = pd.merge(full_df, recovered_df, on=join_cols)

    # countries with at least 100 cases or 1 death
    filtered_df = filter_provinces(full_df, 1, 50)
    filtered_df['new_deaths'] = filtered_df.groupby(
        ['Country/Region', 'Province/State'])['deaths'].diff().clip(lower=0)
    filtered_df['new_cases'] = filtered_df.groupby(
        ['Country/Region', 'Province/State'])['cases'].diff().clip(lower=0)

    # calculate active cases
    filtered_df['active_cases'] = filtered_df['cases'] - filtered_df[
        'deaths'] - filtered_df['recoveries']
    filtered_df['new_cases_as_percent_of_active'] = filtered_df[
        'new_cases'] / filtered_df['active_cases']

    # filtered_df = add_dates_to_index(filtered_df, -14, join_cols)
    filtered_df = filtered_df.set_index(join_cols).select_dtypes(
        include='number').fillna(0)

    # chang index to lat/long and date for use with weather
    filtered_df.reset_index(['Country/Region', 'Province/State'], inplace=True)

    # load local weather cache and check of missing location/dates
    try:
        hist_weather_df_cache = pd.read_csv(config['data_dir'] +
                                            '/wwo_cache.csv')
        hist_weather_df_cache['date'] = pd.to_datetime(
            hist_weather_df_cache['date'])
        hist_weather_df_cache = hist_weather_df_cache.set_index(
            ['Lat', 'Long', 'date'])
        assert hist_weather_df_cache.index.is_unique

    except FileNotFoundError:
        hist_weather_df_cache = pd.DataFrame()

    # get unique missing lat/long date tuples
    missing_indexes = filtered_df.index.difference(hist_weather_df_cache.index)

    if len(missing_indexes) > 0:
        missing_val_df = filtered_df.loc[missing_indexes]
        missing_val_agg = missing_val_df.reset_index().groupby(
            ['Lat', 'Long']).aggregate(start_date=('date', 'min'),
                                       end_date=('date', 'max')).reset_index()
        missing_val_agg['lat_long_string'] = [
            str(x) + ',' + str(y)
            for x, y in zip(missing_val_agg['Lat'], missing_val_agg['Long'])
        ]

        # retrieve missing weather data

        hist_weather_list = []

        for row in missing_val_agg.itertuples():
            try:
                hist_weather_list_loc = retrieve_hist_data(
                    api_key=config['weather_api_key'],
                    location_list=[row.lat_long_string],
                    start_date=row.start_date,
                    end_date=row.end_date,
                    frequency=24,
                    location_label=False,
                    export_csv=False,
                    store_df=True,
                    response_cache_path='woo_cache')

                hist_weather_list.extend(hist_weather_list_loc)
            except requests.HTTPError:
                print(
                    "exceded daily request limit, saving retrieved and exiting"
                )
                break
            except socket.timeout:
                print("timed out, saving retrieved and exiting")
                break
            except urllib.error.URLError:
                print('network issue, saving retrieved and exiting')
                break
        # TODO: need to check cached responses even if there's a current connection failure. There's some mixed up dependencies though. Caches are checked inside wwo_hist, but this is feeding requests one by one
        if len(hist_weather_list) > 0:
            hist_weather_df_new = pd.concat(hist_weather_list)
            hist_weather_df_new['Lat'] = hist_weather_df_new['location'].apply(
                lambda x: x.split(',')[0]).astype('float')
            hist_weather_df_new['Long'] = hist_weather_df_new[
                'location'].apply(lambda x: x.split(',')[1]).astype('float')
            hist_weather_df_new.rename(columns={'date_time': 'date'},
                                       inplace=True)
            hist_weather_df_new.set_index(['Lat', 'Long', 'date'],
                                          inplace=True)
            hist_weather_df = pd.concat(
                [hist_weather_df_cache, hist_weather_df_new],
                verify_integrity=True)
            hist_weather_df.to_csv(config['data_dir'] + '/wwo_cache.csv')
        else:
            print("unable to retrieve any new weather data.")
            hist_weather_df = hist_weather_df_cache

        hist_weather_df = hist_weather_df.infer_objects()
        joined_df = filtered_df.join(hist_weather_df, how='inner')
        joined_df.to_csv(config['data_dir'] + '/prepped_data.csv')
        joined_df.to_pickle(config['data_dir'] + '/prepped_data.pkl')
Ejemplo n.º 25
0
def forecast(city_name):
    os.chdir(r"C:\Users\Administrator\Desktop")
    frequency = 3
    start_date = '1-JAN-2019'
    end_date = '1-JAN-2020'
    api_key = 'e60a5f5f96574a33947210842201502'
    #city_name = input('Enter city name: ')
    location_list = [city_name]
    hist_weather_data = retrieve_hist_data(api_key,
                                           location_list,
                                           start_date,
                                           end_date,
                                           frequency,
                                           location_label=False,
                                           export_csv=True,
                                           store_df=True)
    path = "C:\\Users\\Administrator\\Desktop\\"

    data = pd.read_csv(path + city_name + ".csv")

    # drop or delete the unnecessary columns in the data.
    data = data.drop([
        "date_time", 'maxtempC', 'DewPointC', 'mintempC', 'sunHour',
        'moon_illumination', 'moonrise', 'moonset', 'sunrise', 'sunset',
        'HeatIndexC', 'WindChillC', 'WindGustKmph', 'totalSnow_cm'
    ],
                     axis=1,
                     inplace=False)

    data.to_csv(city_name + '.csv')

    params = {
        'access_key': '7f31a3c1baed8dddc5b06a0448f4b534',
        'query': city_name
    }

    api_result = requests.get('http://api.weatherstack.com/current', params)
    arr = []
    api_response = api_result.json()
    print('\n')
    print(u'Given City Name: %s' % (api_response['location']['name']))
    #a=api_response['location']['name']
    #these variables a to k can be returned to get the current details
    print(u'Current temperature is %d℃' %
          (api_response['current']['temperature']))
    a = api_response['current']['temperature']
    print(u'Current Humidity is %d' % (api_response['current']['humidity']))
    b = api_response['current']['humidity']
    print(u'Current Pressure is %d Pascal' %
          (api_response['current']['pressure']))
    c = api_response['current']['pressure']
    print(u'Current Cloud Cover is %d' %
          (api_response['current']['cloudcover']))
    d = api_response['current']['cloudcover']
    print(u'Current Precipitation is %d' % (api_response['current']['precip']))
    e = api_response['current']['precip']
    print(u'Current Visibility is %d' %
          (api_response['current']['visibility']))
    f = api_response['current']['visibility']
    print(u'Current Wind Speed is %d' %
          (api_response['current']['wind_speed']))
    g = api_response['current']['wind_speed']
    print(u'Current Feels Like is %d' % (api_response['current']['feelslike']))
    h = api_response['current']['feelslike']
    print(u'Current Wind Direction is %s' %
          (api_response['current']['wind_dir']))
    i = api_response['current']['wind_arr']
    print(u'Current UV Index is %d' % (api_response['current']['uv_index']))
    j = api_response['current']['uv_index']
    print(u'Current Wind Degree is %d' %
          (api_response['current']['wind_degree']))
    k = api_response['current']['wind_degree']

    # save the data in a csv file
    path = "C:\\Users\\Administrator\\Desktop\\"
    data = pd.read_csv(path + city_name + ".csv")
    #for pressure
    X = data.drop(['pressure'], axis=1)
    Y = data['pressure']
    Y = Y.values.reshape(-1, 1)

    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01)
    model = lm().fit(x_train, y_train)
    pressure = model.predict(x_test)
    print(pressure, 'This is the pressure in pascal for the input')

    #for temperature
    X = data.drop(['tempC'], axis=1)

    Y = data['tempC']
    Y = Y.values.reshape(-1, 1)

    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01)
    model = lm().fit(x_train, y_train)
    temp = model.predict(x_test)
    print(temp, 'This is the temperature in degrees C for the input')

    #for humidity
    X = data.drop(['humidity'], axis=1)

    Y = data['humidity']
    Y = Y.values.reshape(-1, 1)

    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.01)
    model = lm().fit(x_train, y_train)
    hum = model.predict(x_test)
    print(hum, 'This is the humidity for the input')
    pressure = str(pressure)
    temp = str(temp)
    hum = str(hum)
    return temp, pressure, hum
Ejemplo n.º 26
0
from wwo_hist import retrieve_hist_data
import pandas as ps
import plotly.express as px

API_KEY = ''
location_list = ['beirut']
start_date = '01-JAN-2009'
end_date = '10-OCT-2019'
freq = 24

data = retrieve_hist_data(API_KEY,
	location_list,
	start_date,
	end_date,
 	freq,
	location_label = False,
	export_csv = True,
	store_df = True)