def performEventDurationTest():
    mahal_timeseries = readOutlierScores("results/outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")
    
    mahal_timeseries_fine = readOutlierScores("results/link_20_normalize_outlier_scores.csv")
    
    threshold_vals = [.90,.91,.92,.93,.94,.95,.96,.97,.98,.99]
    window_sizes = [1,2,3,4,6,8,12,24]
    with open('results/threshold_experiment.csv', 'w') as f:
        w = csv.writer(f)
        w.writerow(['granularity', 'window','threshold', 'duration'])
        
        for window_size in window_sizes:
            for threshold in threshold_vals:
                print (window_size, threshold)
                events = detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, 
                      "results/events_windowed.csv", window_size=window_size, threshold_quant=threshold)
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["coarse", window_size, threshold, duration])
    
    
                events= detectWindowedEvents(mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, 
                    "results/link_20_normalize_events_windowed.csv", window_size=window_size,
                    threshold_quant=threshold)      
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["fine", window_size, threshold, duration])
def performEventDurationTest():
    mahal_timeseries = readOutlierScores("results/outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")

    mahal_timeseries_fine = readOutlierScores(
        "results/link_20_normalize_outlier_scores.csv")

    threshold_vals = [.90, .91, .92, .93, .94, .95, .96, .97, .98, .99]
    window_sizes = [1, 2, 3, 4, 6, 8, 12, 24]
    with open('results/threshold_experiment.csv', 'w') as f:
        w = csv.writer(f)
        w.writerow(['granularity', 'window', 'threshold', 'duration'])

        for window_size in window_sizes:
            for threshold in threshold_vals:
                print(window_size, threshold)
                events = detectWindowedEvents(mahal_timeseries,
                                              zscore_timeseries,
                                              global_pace_timeseries,
                                              "results/events_windowed.csv",
                                              window_size=window_size,
                                              threshold_quant=threshold)
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["coarse", window_size, threshold, duration])

                events = detectWindowedEvents(
                    mahal_timeseries_fine,
                    zscore_timeseries,
                    global_pace_timeseries,
                    "results/link_20_normalize_events_windowed.csv",
                    window_size=window_size,
                    threshold_quant=threshold)
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["fine", window_size, threshold, duration])
def process_events(outlier_score_file, feature_dir, output_file):
    mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file)
    global_pace_timeseries = readGlobalPace(feature_dir)

    events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries)
    
    new_scores_file = output_file.split(".")[0] + "_scores.csv"
    augment_outlier_scores(outlier_score_file, new_scores_file, predictions)
    
    with open(output_file, 'w') as f:
        w = csv.writer(f)
        w.writerow(['event', 'start_date', 'end_date', 'duration', 'max_mahal', 'max_pace_dev', 'min_pace_dev'])
        for line in events:
            w.writerow(['?'] + line)
def run_sims_in_parallel(outlier_score_file, feature_dir, output_file):
    pool = Pool(8)
    mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file)
    global_pace_timeseries = readGlobalPace(feature_dir)
    
    sim_function = partial(run_many_simulations, mahal_timeseries = mahal_timeseries,
                           c_timeseries = c_timeseries, global_pace_timeseries=global_pace_timeseries)
    sim_sizes = [1250]*8
    result = pool.map(sim_function, sim_sizes)
    
    with open(output_file, 'w') as f:
        w = csv.writer(f)
        w.writerow(['event','start_date', 'end_date', 'duration', 'max_pace_dev', 'min_pace_dev'])
        for chunk in result:
            w.writerows(chunk)
def run_random_sims(outlier_score_file, feature_dir):
    
    mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file)
    global_pace_timeseries = readGlobalPace(feature_dir)
    
    for p in range(50):
        print ("Sim %d" % p)
        initial_state, trans_matrix, emission_matrix = randomly_draw_parameters()
    
        events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries,
                        global_pace_timeseries, threshold_quant=.95,
                        trans_matrix = trans_matrix,
                      emission_matrix=emission_matrix)
        new_scores_file = 'tmp_results/coarse_events_k%d_scores.csv'%p
    
        augment_outlier_scores(outlier_score_file, new_scores_file, predictions)
Beispiel #6
0
def process_events(outlier_score_file, feature_dir, output_file):
    mahal_timeseries, c_timeseries = readOutlierScores(outlier_score_file)
    global_pace_timeseries = readGlobalPace(feature_dir)

    events, predictions = detect_events_hmm(mahal_timeseries, c_timeseries,
                                            global_pace_timeseries)

    new_scores_file = output_file.split(".")[0] + "_scores.csv"
    augment_outlier_scores(outlier_score_file, new_scores_file, predictions)

    with open(output_file, 'w') as f:
        w = csv.writer(f)
        w.writerow([
            'event', 'start_date', 'end_date', 'duration', 'max_mahal',
            'max_pace_dev', 'min_pace_dev'
        ])
        for line in events:
            w.writerow(['?'] + line)
    
    
                events= detectWindowedEvents(mahal_timeseries_fine, zscore_timeseries, global_pace_timeseries, 
                    "results/link_20_normalize_events_windowed.csv", window_size=window_size,
                    threshold_quant=threshold)      
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["fine", window_size, threshold, duration])
                          
                




if(__name__=="__main__"):
    #performEventDurationTest()    
    
    mahal_timeseries = readOutlierScores("results/outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")
    detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, 
                          "results/events_windowed.csv", window_size=8, threshold_quant=.95)
    
    mahal_timeseries = readOutlierScores("results/link_20_normalize_outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")
    detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, 
                          "results/link_20_normalize_events_windowed.csv", window_size=8,
                          threshold_quant=.95)                  
                          
                          
    logMsg("done")
                events = detectWindowedEvents(
                    mahal_timeseries_fine,
                    zscore_timeseries,
                    global_pace_timeseries,
                    "results/link_20_normalize_events_windowed.csv",
                    window_size=window_size,
                    threshold_quant=threshold)
                duration = getEventDuration(events, "2012-10-31")
                w.writerow(["fine", window_size, threshold, duration])


if (__name__ == "__main__"):
    #performEventDurationTest()

    mahal_timeseries = readOutlierScores("results/outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")
    detectWindowedEvents(mahal_timeseries,
                         zscore_timeseries,
                         global_pace_timeseries,
                         "results/events_windowed.csv",
                         window_size=8,
                         threshold_quant=.95)

    mahal_timeseries = readOutlierScores(
        "results/link_20_normalize_outlier_scores.csv")
    global_pace_timeseries = readGlobalPace("4year_features")
    zscore_timeseries = readZScoresTimeseries("results/zscore.csv")
    detectWindowedEvents(mahal_timeseries,
                         zscore_timeseries,
                         global_pace_timeseries,
    r.next()
    timeseries = {}
    for line in r:
        (date, hour, weekday) = line[0:3]
        hour = int(hour)
        timeseries[(date, hour, weekday)] = map(float, line[3:])
    return timeseries


#########################################################################################################
################################### MAIN CODE BEGINS HERE ###############################################
#########################################################################################################
if (__name__ == "__main__"):

    #Read the previous results from file
    mahal_timeseries = readOutlierScores(OUTLIER_SCORE_FILE)
    global_pace_timeseries = readGlobalPace(FEATURE_DIR)
    zscore_timeseries = readZScoresTimeseries(ZSCORE_FILE)

    #Perform the event detection on the OUTLIER_SCORE, using extra info to describe the events
    #Events are detected as the 5% lowest values of R(t), and events less than 6 hours apart are merged

    detectEventsSwitching(mahal_timeseries,
                          zscore_timeseries,
                          global_pace_timeseries,
                          OUT_UNFILTERED_EVENTS,
                          OUT_FILTERED_EVENTS,
                          min_event_spacing=6,
                          threshold_quant=.90)
    logMsg("Done.")
Beispiel #10
0
#Returns:
    #A dictionary which contains the standardized pace vectors (as Numpy matrices), keyed by (date, hour, weekday) 
def readZScoresTimeseries(filename):
    r = csv.reader(open(filename, "r"))
    r.next()
    timeseries = {}
    for line in r:
        (date, hour, weekday) = line[0:3]
        hour = int(hour)
        timeseries[(date,hour,weekday)] = map(float, line[3:])
    return timeseries

#########################################################################################################
################################### MAIN CODE BEGINS HERE ###############################################
#########################################################################################################
if(__name__=="__main__"):

    #Read the previous results from file
    mahal_timeseries = readOutlierScores(OUTLIER_SCORE_FILE)
    global_pace_timeseries = readGlobalPace(FEATURE_DIR)
    zscore_timeseries = readZScoresTimeseries(ZSCORE_FILE)

    #Perform the event detection on the OUTLIER_SCORE, using extra info to describe the events
    #Events are detected as the 5% lowest values of R(t), and events less than 6 hours apart are merged    
    
    detectEventsSwitching(mahal_timeseries, zscore_timeseries, global_pace_timeseries,
                       OUT_UNFILTERED_EVENTS, OUT_FILTERED_EVENTS, min_event_spacing=6,
                       threshold_quant=.90)
    logMsg("Done.")