def test(csv_data, train_size, test_size, data_col, time_col, seq, future, result_file=None, show=0): """ test the the classifier and visualizes the predicted and actual values, does not print the visualization of the future. Uses MSEloss as criteria :param csv_data: CSVFileManager object containing training data :param train_size: size of the train data for iloc :param test_size: size of the test data for iloc :param data_col: # column of the target data in csv_data.data dataframe :param time_col: # column of the target timestamp in csv_data.data dataframe :param seq: sequence length :param future: number of future steps to be predicted, can not be greater than test_size as some part of test data would be used for future predictions :param result_file: a complete file path where the results would be stored after testing :param show: Whether to show the graph, **NOTE** : requires you to close the graph to continue the result :return: """ if future >= test_size: raise RequirementNotSatisfied test_data = csv_data.data.iloc[train_size:train_size + test_size + 1, data_col] test_visualize = pd.DataFrame(csv_data.data.iloc[train_size:train_size + test_size, time_col], columns=['timestamp']) test_visualize.reset_index(drop=True, inplace=True) test_data.reset_index(drop=True, inplace=True) test_iput = test_data[:-1] test_target = test_data[1:] # I am not convinced why test_size - 1, on perfect multiple train and test sizes this could break. test_iput = torch.from_numpy(test_iput.values.reshape(-1, test_size - 1)) test_target = torch.from_numpy( test_target.values.reshape(-1, test_size - 1)) test_iput = test_iput.to(seq.device) test_target = test_target.to(seq.device) criteria = nn.MSELoss() with torch.no_grad(): pred = seq(test_iput, future=future) # Number of futures would be added in the prediction, thats why we pass whole test_data l_test = criteria(pred[:, :-future], test_target) print('test loss:', l_test.item()) mape = calc_mape(pd.DataFrame(pred[:, :-future].cpu().numpy()), test_data) print("Weighted mean absolute error is :", mape) pred = torch.squeeze(pred) pf = pd.DataFrame(pred[:-future].cpu().numpy(), columns=['idle']) pf['timestamp'] = test_visualize.iloc[:, 0] test_visualize['idle'] = test_data[:-1] ft = CSVFileManager(interval=180, df=test_visualize) ft = DataVisualizer(csv_mgr=ft, x_col='timestamp', y_col='idle') ft.forecast(compare_data=pf, column_list=['timestamp', 'idle'], file_path=result_file, show=show) return mape, l_test.item()
def pre_train(path, interval, get_by_interval): """ Pre train work :param path: Complete path of the csv file for the data :param interval: interval in sec by which the data rows are separated in path csv :param get_by_interval: interval in sec by which csv data mgr will have data rows separated of the path csv :return: Return initialized CSVFileManager object """ np.random.seed(0) torch.manual_seed(0) csv_mgr = CSVFileManager(filename=path, interval=interval) csv_mgr.get_by_interval(interval=get_by_interval) return csv_mgr
def pre_train(path, dev, interval, get_by_interval): """ Pre train work :param path: Complete path of the csv file for the data :param dev: Network device to choose the data from the dataframe :param interval: interval in sec by which the data rows are separated in path csv :param get_by_interval: interval in sec by which csv data mgr will have data rows separated of the path csv :return: Return initialized CSVFileManager object """ np.random.seed(0) torch.manual_seed(0) csv_mgr = CSVFileManager(filename=path, interval=interval) csv_mgr.data = csv_mgr.data.loc[csv_mgr.data['IFACE'] == dev] csv_mgr.data.reset_index(drop=True, inplace=True) if get_by_interval != interval: csv_mgr.get_by_interval(interval=get_by_interval) return csv_mgr
def forecast(seq, test_data, data_col, time_col, future, result_file=None): """ Forecast the datacol for future number of steps. To do: there seems to be some caveats in there while slicing and selecting the data, also improve on the data plotting. Also note that when applying DataVisualizer.forecast, create the DataVisualizer object of the original data and pass the predicted data as compare_data parameter to dataVisualizer.forecast() :param seq: Trained model object of Seq2seq class :param test_data: CsvFIleManager object of test data :param data_col: # column in test_data.data dataframe representing target data :param time_col: # column in test_data.data dataframe representing target time :param future: # steps in the future for forecast :param result_file: result file path to save forecast :return: """ total_size = test_data.data.shape[0] test_iput = test_data.data.iloc[0:(total_size - future), data_col] test_size = test_iput.size test_target = test_data.data.iloc[:, data_col] test_visualize = pd.DataFrame(test_data.data.iloc[:, time_col], columns=['timestamp']) test_visualize.reset_index(drop=True, inplace=True) test_iput.reset_index(drop=True, inplace=True) test_iput = torch.from_numpy(test_iput.values.reshape(-1, test_size)) test_target = torch.from_numpy(test_target.values) test_iput = test_iput.to(seq.device) test_target = test_target.to(seq.device) criteria = nn.MSELoss() with torch.no_grad(): pred = seq(test_iput, future=future) pred = torch.squeeze(pred) l_forecast = criteria(pred[test_size:test_size + future], test_target[test_size:test_size + future]) print('forecast loss:', l_forecast.item()) pf1 = pd.DataFrame(pred[test_size:test_size + future].cpu().numpy(), columns=['idle']) tmp = pd.DataFrame(test_visualize.iloc[test_size:test_size + future]) tmp.reset_index(drop=True, inplace=True) pf1['timestamp'] = tmp[:] test_visualize['idle'] = test_target[:] test_visualize = CSVFileManager(interval=180, df=test_visualize) ft = DataVisualizer(csv_mgr=test_visualize, x_col='timestamp', y_col='idle') ft.forecast(compare_data=pf1, column_list=['timestamp', 'idle'], file_path=result_file)
y_cp = compare_data[column_list[1]].tolist() plt.plot(x_cp, y_cp, 'b') # plt.xticks(range(len(x)), x, rotation='vertical') if show == 1: plt.show() if file_path != None: plt.savefig(file_path, figsize=(10, 10), dpi=100) if __name__ == "__main__": PATH = 'C://Users//Mahesh.Bhosale//PycharmProjects//Idle_bot//Dataset//data//IO_STAT//IO_STAT-06.csv' #csv_mgr = CSVFileManager(filename=PATH, interval=60) pf = pd.DataFrame(np.arange(0, 4), columns=['A']) pf['B'] = ['a', 'b', 'c', 'd'] csv_mgr = CSVFileManager(df=pf, interval=60) print(pf) #csv_mgr.get_by_interval(interval=3600) #print(csv_mgr.data) #csv_mgr.read_file() pf1 = pd.DataFrame(np.arange(0, 4) * 3, columns=['A']) pf1['B'] = ['a', 'b', 'c', 'd'] pf1 = pf1.iloc[1:3, :] print(pf1) dv = DataVisualizer(csv_mgr=csv_mgr, x_col='B', y_col='A') dv.forecast( compare_data=pf1, column_list=['B', 'A'], file_path= "C://Users//Mahesh.Bhosale//PycharmProjects//Idle_bot//Dataset//data//d.png" )