def test3(): coefficients = run_weka.getCoefficients(r'C:\dev\exercises\time_series_purchases_99_other_001_lag_05.results') for i in range(len(coefficients)): print i, ':', coefficients[i]
def analyzeTimeSeries(filename, max_lag, fraction_training): """ Main function. Analyze time series in 'filename' (assumed to be a CSV for now) Create model with up to mag_lag lags Use the first fraction_training of data for training and the remainder for testing """ base_name = os.path.splitext(filename)[0] regression_matrix_csv = base_name + '.regression.csv' results_filename = base_name + '.results' model_filename = base_name + '.model' prediction_matrix_csv = base_name + '.prediction.csv' """ Assume input file is a CSV with a header row """ time_series_data, header = csv.readCsvFloat2(filename, True) """ Assume a weekly pattern """ number_training = (int(float(len(time_series_data))*fraction_training)//7)*7 print 'number_training', number_training, 'fraction_training', fraction_training,'len(time_series_data)',len(time_series_data) assert(number_training > max_lag) time_series = NP.transpose(NP.array(time_series_data)) describeNPArray('time_series', time_series) training_time_series = NP.transpose(NP.array(time_series_data[:number_training])) print 'training_time_series.shape', training_time_series.shape t = NP.arange(time_series.shape[1]) training_t = NP.arange(training_time_series.shape[1]) num_series = training_time_series.shape[0] num_rows = training_time_series.shape[1] days_to_keep = [getDaysOfWeekToKeep(training_time_series[i,:]) for i in range(num_series)] masks = [getDaysOfWeekMask(days_to_keep[i], time_series.shape[1]) for i in range(num_series)] training_masks = [getDaysOfWeekMask(days_to_keep[i], num_rows) for i in range(num_series)] trends = [getTrend(training_t, training_time_series[i,:], training_masks[i]) for i in range(num_series)] x = [removeTrend1D(trends[i], training_t, training_time_series[i], training_masks[i]) for i in range(num_series)] for i in range(num_series): describeNPVector('x[%0d]'%i, x[i]) detrended_training_time_series = NP.zeros([num_series, x[0].shape[0]]) print 'detrended_training_time_series.shape', detrended_training_time_series.shape for i in range(num_series): print 'x[%0d].shape'%i, x[i].shape detrended_training_time_series[i,:] = x[i] print 'detrended_training_time_series.shape', detrended_training_time_series.shape # filtered_time_series = NP.vstack([filterDaysOfWeek(training_time_series[i,:], days_to_keep[i]) for i in range(num_series)]) # print 'filtered_time_series.shape', filtered_time_series.shape for i in range(num_series): describeNPVector('detrended_training_time_series[%0d]'%i, detrended_training_time_series[i]) means, stddevs = timeSeriesToMatrixCsv(regression_matrix_csv, detrended_training_time_series, training_masks, max_lag) print 'means', means print 'stddevs', stddevs run_weka.runMLPTrain(regression_matrix_csv, results_filename, model_filename, True, '-H 4') coefficients = run_weka.getCoefficients(results_filename) print '--------------------------------------------' print 'coefficients', len(coefficients) print coefficients print '--------------------------------------------' print 'means', len(means) print means print '--------------------------------------------' print 'stddevs', len(stddevs) print stddevs print '--------------------------------------------' #exit() detrended_full_x = [removeTrend1D(trends[i], t, time_series[i], masks[i]) for i in range(num_series)] detrended_time_series = NP.zeros([num_series, detrended_full_x[0].shape[0]]) print 'detrended_time_series.shape', detrended_time_series.shape for i in range(num_series): print 'full_x[%0d].shape'%i, detrended_full_x[i].shape detrended_predictions = predictTimeSeries(coefficients, means, stddevs, t, detrended_full_x[0], detrended_full_x[1], number_training, max_lag, masks) predictions = addTrend1D(trends[1], t, detrended_predictions, masks[1]) print '--------------------------------------------' print 'predictions =', predictions.shape # print predictions full_x = [NP.array(time_series[i]) for i in range(num_series)] print 't.shape', t.shape print 'full_x[0].shape', full_x[0].shape print 'full_x[1].shape', full_x[1].shape print 'predictions.shape', predictions.shape predicted_time_series = NP.vstack([t, full_x[0], full_x[1], predictions]) print 'predicted_time_series.shape', predicted_time_series.shape # retrend !@#$\\ prediction_header = ['t', 'x', 'y', 'y_pred'] predicted_time_series_data = [[str(predicted_time_series[i,j]) for i in range(predicted_time_series.shape[0])] for j in range(predicted_time_series.shape[1])] csv.writeCsv(prediction_matrix_csv, predicted_time_series_data, prediction_header)