コード例 #1
0
def test(csv_data,
         train_size,
         test_size,
         data_col,
         time_col,
         seq,
         future,
         result_file=None,
         show=0):
    """
    test the the classifier and visualizes the predicted and actual values, does not print the visualization of
    the future. Uses MSEloss as criteria
    :param csv_data: CSVFileManager object containing training data
    :param train_size: size of the train data for iloc
    :param test_size: size of the test data for iloc
    :param data_col: # column of the target data in csv_data.data dataframe
    :param time_col: # column of the target timestamp in csv_data.data dataframe
    :param seq: sequence length
    :param future: number of future steps to be predicted, can not be greater than test_size as some part of test data
    would be used for future predictions
    :param result_file: a complete file path where the results would be stored after testing
    :param show: Whether to show the graph, **NOTE** : requires you to close the graph to continue the result
    :return:
    """
    if future >= test_size:
        raise RequirementNotSatisfied
    test_data = csv_data.data.iloc[train_size:train_size + test_size + 1,
                                   data_col]
    test_visualize = pd.DataFrame(csv_data.data.iloc[train_size:train_size +
                                                     test_size, time_col],
                                  columns=['timestamp'])
    test_visualize.reset_index(drop=True, inplace=True)
    test_data.reset_index(drop=True, inplace=True)
    test_iput = test_data[:-1]
    test_target = test_data[1:]
    # I am not convinced why test_size - 1, on perfect multiple train and test sizes this could break.
    test_iput = torch.from_numpy(test_iput.values.reshape(-1, test_size - 1))
    test_target = torch.from_numpy(
        test_target.values.reshape(-1, test_size - 1))
    test_iput = test_iput.to(seq.device)
    test_target = test_target.to(seq.device)
    criteria = nn.MSELoss()
    with torch.no_grad():
        pred = seq(test_iput, future=future)
        # Number of futures would be added in the prediction, thats why we pass whole test_data
        l_test = criteria(pred[:, :-future], test_target)
        print('test loss:', l_test.item())
    mape = calc_mape(pd.DataFrame(pred[:, :-future].cpu().numpy()), test_data)
    print("Weighted mean absolute error is :", mape)
    pred = torch.squeeze(pred)
    pf = pd.DataFrame(pred[:-future].cpu().numpy(), columns=['idle'])
    pf['timestamp'] = test_visualize.iloc[:, 0]
    test_visualize['idle'] = test_data[:-1]
    ft = CSVFileManager(interval=180, df=test_visualize)
    ft = DataVisualizer(csv_mgr=ft, x_col='timestamp', y_col='idle')
    ft.forecast(compare_data=pf,
                column_list=['timestamp', 'idle'],
                file_path=result_file,
                show=show)
    return mape, l_test.item()
コード例 #2
0
def forecast(seq, test_data, data_col, time_col, future, result_file=None):
    """
    Forecast the datacol for future number of steps.
    To do: there seems to be some caveats in there while slicing and selecting the data, also improve on the data
    plotting. Also note that when applying DataVisualizer.forecast, create the DataVisualizer object of the original
    data and pass the predicted data as compare_data parameter to dataVisualizer.forecast()
    :param seq: Trained model object of Seq2seq class
    :param test_data: CsvFIleManager object of test data
    :param data_col: # column in test_data.data dataframe representing target data
    :param time_col: # column in test_data.data dataframe representing target time
    :param future: # steps in the future for forecast
    :param result_file: result file path to save forecast
    :return:
    """
    total_size = test_data.data.shape[0]
    test_iput = test_data.data.iloc[0:(total_size - future), data_col]
    test_size = test_iput.size
    test_target = test_data.data.iloc[:, data_col]
    test_visualize = pd.DataFrame(test_data.data.iloc[:, time_col],
                                  columns=['timestamp'])
    test_visualize.reset_index(drop=True, inplace=True)
    test_iput.reset_index(drop=True, inplace=True)
    test_iput = torch.from_numpy(test_iput.values.reshape(-1, test_size))
    test_target = torch.from_numpy(test_target.values)
    test_iput = test_iput.to(seq.device)
    test_target = test_target.to(seq.device)
    criteria = nn.MSELoss()
    with torch.no_grad():
        pred = seq(test_iput, future=future)
        pred = torch.squeeze(pred)
        l_forecast = criteria(pred[test_size:test_size + future],
                              test_target[test_size:test_size + future])
        print('forecast loss:', l_forecast.item())
    pf1 = pd.DataFrame(pred[test_size:test_size + future].cpu().numpy(),
                       columns=['idle'])
    tmp = pd.DataFrame(test_visualize.iloc[test_size:test_size + future])
    tmp.reset_index(drop=True, inplace=True)
    pf1['timestamp'] = tmp[:]
    test_visualize['idle'] = test_target[:]
    test_visualize = CSVFileManager(interval=180, df=test_visualize)
    ft = DataVisualizer(csv_mgr=test_visualize,
                        x_col='timestamp',
                        y_col='idle')
    ft.forecast(compare_data=pf1,
                column_list=['timestamp', 'idle'],
                file_path=result_file)