Exemple #1
0
def test3():
    coefficients = run_weka.getCoefficients(r'C:\dev\exercises\time_series_purchases_99_other_001_lag_05.results') 
    for i in range(len(coefficients)):
        print i, ':', coefficients[i]
Exemple #2
0
def analyzeTimeSeries(filename, max_lag, fraction_training):
    """ Main function. 
        Analyze time series in 'filename' (assumed to be a CSV for now)
        Create model with up to mag_lag lags
        Use the first fraction_training of data for training and the 
        remainder for testing
    """
    
    base_name = os.path.splitext(filename)[0]
    regression_matrix_csv = base_name + '.regression.csv'
    results_filename = base_name + '.results' 
    model_filename = base_name + '.model' 
    prediction_matrix_csv = base_name + '.prediction.csv'
    
    """ Assume input file is a CSV with a header row """
    time_series_data, header = csv.readCsvFloat2(filename, True)
    
    """ Assume a weekly pattern """
    number_training = (int(float(len(time_series_data))*fraction_training)//7)*7
    
    print 'number_training', number_training, 'fraction_training', fraction_training,'len(time_series_data)',len(time_series_data)
    assert(number_training > max_lag)
    
    time_series = NP.transpose(NP.array(time_series_data))
    describeNPArray('time_series', time_series)
        
    training_time_series = NP.transpose(NP.array(time_series_data[:number_training]))
    print 'training_time_series.shape', training_time_series.shape
    
    t = NP.arange(time_series.shape[1])
    training_t = NP.arange(training_time_series.shape[1])
    
    num_series = training_time_series.shape[0]
    num_rows = training_time_series.shape[1]
    
    days_to_keep = [getDaysOfWeekToKeep(training_time_series[i,:]) for i in range(num_series)]
    
    masks = [getDaysOfWeekMask(days_to_keep[i], time_series.shape[1]) for i in range(num_series)]
    training_masks = [getDaysOfWeekMask(days_to_keep[i], num_rows) for i in range(num_series)]
    
    trends = [getTrend(training_t, training_time_series[i,:], training_masks[i]) for i in range(num_series)]
    
    x = [removeTrend1D(trends[i], training_t, training_time_series[i], training_masks[i]) for i in range(num_series)]
    for i in range(num_series):
        describeNPVector('x[%0d]'%i, x[i])
    detrended_training_time_series = NP.zeros([num_series, x[0].shape[0]])
    print 'detrended_training_time_series.shape', detrended_training_time_series.shape
    for i in range(num_series):
        print 'x[%0d].shape'%i, x[i].shape
        detrended_training_time_series[i,:] = x[i]
    print 'detrended_training_time_series.shape', detrended_training_time_series.shape
    # filtered_time_series = NP.vstack([filterDaysOfWeek(training_time_series[i,:], days_to_keep[i]) for i in range(num_series)])
    # print 'filtered_time_series.shape', filtered_time_series.shape
   
    for i in range(num_series):
        describeNPVector('detrended_training_time_series[%0d]'%i, detrended_training_time_series[i])
        
    means, stddevs = timeSeriesToMatrixCsv(regression_matrix_csv, detrended_training_time_series, training_masks, max_lag)
    print 'means', means
    print 'stddevs', stddevs
    run_weka.runMLPTrain(regression_matrix_csv, results_filename, model_filename, True, '-H 4')
    coefficients = run_weka.getCoefficients(results_filename)
   
    print '--------------------------------------------'
    print 'coefficients', len(coefficients)
    print coefficients
    print '--------------------------------------------'
    print 'means', len(means)
    print means
    print '--------------------------------------------'
    print 'stddevs', len(stddevs)
    print stddevs
    print '--------------------------------------------'
    #exit()
    detrended_full_x = [removeTrend1D(trends[i], t, time_series[i], masks[i]) for i in range(num_series)]
    detrended_time_series = NP.zeros([num_series, detrended_full_x[0].shape[0]])
    print 'detrended_time_series.shape', detrended_time_series.shape
    for i in range(num_series):
        print 'full_x[%0d].shape'%i, detrended_full_x[i].shape
    detrended_predictions = predictTimeSeries(coefficients, means, stddevs, t, detrended_full_x[0], detrended_full_x[1], number_training, max_lag, masks)
    predictions = addTrend1D(trends[1], t, detrended_predictions, masks[1]) 
    print '--------------------------------------------'
    print 'predictions =', predictions.shape
    # print predictions
    full_x = [NP.array(time_series[i]) for i in range(num_series)]
    
    print 't.shape', t.shape
    print 'full_x[0].shape', full_x[0].shape
    print 'full_x[1].shape', full_x[1].shape
    print 'predictions.shape', predictions.shape
    
    predicted_time_series = NP.vstack([t, full_x[0], full_x[1], predictions])
    
    print 'predicted_time_series.shape', predicted_time_series.shape
    # retrend !@#$\\
    prediction_header = ['t', 'x', 'y', 'y_pred']
    predicted_time_series_data = [[str(predicted_time_series[i,j]) 
                                    for i in range(predicted_time_series.shape[0])]
                                        for j in range(predicted_time_series.shape[1])]
                            
    csv.writeCsv(prediction_matrix_csv, predicted_time_series_data, prediction_header)