def test3_read_from_file_pandas(): #df = pd.read_csv(r'../timeSeriesData/TimeSeriesData1/1_temperature_test.csv') #df = pd.read_csv('../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv') df = fio.read_from_file('../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv') print(df) #print(df) #print(d2) d3 = fio.read_from_file('../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv') print(df.iloc[:,-1:]) #print(d3.iloc[:,-3]) return 0
def smape(y_forecast, y_test: str): """ takes in a database (y_forcast) and a file name (y_test) and returns the symmetric mean absolute percentage error Author: Nick Titzler """ yf = pre.db2ts(y_forecast) yt = fio.read_from_file(y_test) return 100/len(yf.iloc[:,-1].to_numpy()) * np.sum(2 * np.abs(yt.iloc[:,-1].to_numpy() - yf.iloc[:,-1].to_numpy()) / (np.abs(yf.iloc[:,-1].to_numpy()) + np.abs(yt.iloc[:,-1].to_numpy())))
def test2_timeSeriesData2(): fname1 = "../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv" ts = fio.read_from_file(fname1) fname2 = "../timeSeriesData/TimeSeriesData2/wind_aristeomercado_10m_complete.csv" ts = fio.read_from_file(fname2) fname3 = "../timeSeriesData/TimeSeriesData1/9_distribution_subsampled_train_empty.csv" fname4 = "../timeSeriesData/TimeSeriesData1/AtmPres2005NovMinEmpty.csv" fname5 = "../timeSeriesData/TimeSeriesData1/save1.p" ts1 = fio.read_from_file(fname5) print(ts1) ts = fio.read_from_file(fname4) print(ts)
def mape(y_forecast, y_test: str): """ takes in a database (y_forcast) and a file name (y_test) and returns the mean absoulute percentage error Author: Nick Titzler """ yf = pre.db2ts(y_forecast) yt = fio.read_from_file(y_test) return np.mean((np.abs(yf.iloc[:,-1].to_numpy()-yt.iloc[:,-1].to_numpy()) / yf.iloc[:,-1].to_numpy())) * 100
def mse(y_forecast, y_test: str): """ takes in a database (y_forcast) and a file name (y_test: str) and returns the mean squared error between the datasets Author: Nick Titzler """ yf = pre.db2ts(y_forecast) yt = fio.read_from_file(y_test) return mean_squared_error(yf.iloc[:,-1].to_numpy(), yt.iloc[:,-1].to_numpy())
def mlp_forecast(model_data, x_filename): """ Takes in a tuple containing a MLPRegressor object and a tuple as well a string. Returns a numpy matrix. Predicts a future set of values from a given set of values and a trained model. """ # extract model and tree from model data model = model_data[0] window = model_data[1] # grab test data from file x = fio.read_from_file(x_filename) x = x.to_numpy() # predict values y_hat = model.predict(x) # interpolate predicted values to real size y_hat = mlp_output_mapper(y_hat, window) return y_hat
def test1_timeSeriesData1(): """ test time series creation from timeSeriesData1 files """ fileNames = ["1_temperature_test.csv","1_temperature_train.csv","2_temperature_subsampled_test.csv", "2_temperature_subsampled_train.csv", "3_passengers_test.csv","3_passengers_train.csv","4_irradiance_test.csv", "4_irradiance_train.csv", "5_irradiance_subsampled_test.csv", "5_irradiance_subsampled_train.csv", "6_sunspots_test.csv", "6_sunspots_train.csv", "7_distribution_subsampled_norm_test.csv", "7_distribution_subsampled_norm_train.csv", "8_distribution_subsampled_test.csv", "8_distribution_subsampled_train.csv"] fname = "../timeSeriesData/TimeSeriesData1/1_temperature_test.csv" try: for item in fileNames: fname = "../timeSeriesData/TimeSeriesData1/"+item ts = fio.read_from_file(fname) except: print("error in test 1")
def test_execute_pipeline(test_tree): tree = TS_Tree() tree.replace_node("longest_continuous_run", 0) tree.add_node("impute_missing_data", 0) tree.add_node("assign_time", 1, data_start=1.0, increment=.2) tree.add_node("plot", 2) #tree.add_node("clip", 2, data_start=1.0, data_end=10.0) print("\n##Test executting a pipeline to node 3##") tree.print_tree() fname1 = "../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv" ts = fio.read_from_file(fname1) results = tree.execute_path(ts, 3) tree2 = TS_Tree() tree2.replace_node( "read_from_file", 0, input_filename="../timeSeriesData/TimeSeriesData2/AtmPres2005NovMin.csv" )
def ts2db(input_file, perc_train, perc_val, perc_test, input_index, output_index, output_file): """ Takes in an input data file to read in the time series, as well as how much the user wants the data to be split into three categories: training, validation, and testing. Then, it creates three separate time series that it turns into machine learning model friendly databases with the input and output sizes provided, and returns those. """ # read in time series data from file ts = fio.read_from_file(input_file) # split time series data into training, validation, and test sets ts_splits = split_data(ts, perc_train, perc_val, perc_test) # convert datasets into databases that can be processed by # a machine learning model train_db = design_matrix(ts_splits[0], input_index, output_index) # CHANGE OUTPUT HANDLING val_db = design_matrix(ts_splits[1], input_index, output_index) test_db = design_matrix(ts_splits[2], input_index, output_index) # return set of databases return (train_db, val_db, test_db)
def test1_write_to_file(): ts = TS.TimeSeries() ts = fio.read_from_file("../timeSeriesData/TimeSeriesData1/1_temperature_test.csv") fio.write_to_file(ts, "1_temperature_test_output.csv")
def test4_empty_file(): df = fio.read_from_file('../timeSeriesData/TimeSeriesData1/oneItem.csv') print(df)