Example #1
0
def saveEvents(timeSegments, mahal_timeseries, zscore_timeseries, global_pace_timeseries,
               out_file, sorted_mahal=None, mahal_threshold=None):
    eventList = []
    #Compute expected pace and variance
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    
    
    
    #Iterate through the TimeSegments
    for segment in timeSegments:
        #If the segment is above the threshold, it is an event
        if(segment.state==True):
            start_key = timeSegments.sorted_dates[segment.start_id]
            end_key = timeSegments.sorted_dates[segment.end_id]    
            #Compute event properties
            event = computeEventProperties(start_key, end_key, mahal_timeseries, 
                                           global_pace_timeseries, expected_pace_timeseries,
                                           zscore_timeseries, sorted_mahal=sorted_mahal,
                                           mahal_threshold=mahal_threshold)
            #Add to list            
            eventList.append(event)
    
    #Sort events by duration, in descending order
    eventList.sort(key = lambda x: x[5], reverse=True)
    
    #Write events to a CSV file
    w = csv.writer(open(out_file, "w"))
    w.writerow(["start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev",
                "min_pace_dev", "worst_trip"])
                
    for event in eventList:
        [start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip] = event
        formattedEvent = [start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, 
                          duration, hours_above_thresh, "%.2f" % max_pace_dev,
                          "%.2f" % min_pace_dev, worst_trip]
        w.writerow(formattedEvent)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries, threshold_quant=.95):
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    
    # Set up the hidden markov model.  We are modeling the non-event states as "0"
    # and event states as "1"
    
    # Transition matrix with heavy weight on the diagonals ensures that the model
    # is likely to stick in the same state rather than rapidly switching.  In other
    # words, the predictions will be relatively "smooth"
    trans_matrix = array([[.999, .001],
                      [.001,.999]])

    # Emission matrix - state 0 is likely to emit symbol 0, and vice versa
    # In other words, events are likely to be outliers
    emission_matrix = array([[.95, .05],
                             [.4, .6]])
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Example #3
0
def saveEvents(timeSegments,
               mahal_timeseries,
               zscore_timeseries,
               global_pace_timeseries,
               out_file,
               sorted_mahal=None,
               mahal_threshold=None):
    eventList = []
    #Compute expected pace and variance
    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    #Iterate through the TimeSegments
    for segment in timeSegments:
        #If the segment is above the threshold, it is an event
        if (segment.state == True):
            start_key = timeSegments.sorted_dates[segment.start_id]
            end_key = timeSegments.sorted_dates[segment.end_id]
            #Compute event properties
            event = computeEventProperties(start_key,
                                           end_key,
                                           mahal_timeseries,
                                           global_pace_timeseries,
                                           expected_pace_timeseries,
                                           zscore_timeseries,
                                           sorted_mahal=sorted_mahal,
                                           mahal_threshold=mahal_threshold)
            #Add to list
            eventList.append(event)

    #Sort events by duration, in descending order
    eventList.sort(key=lambda x: x[5], reverse=True)

    #Write events to a CSV file
    w = csv.writer(open(out_file, "w"))
    w.writerow([
        "start_date", "end_date", "max_mahal", "mahal_quant", "duration",
        "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip"
    ])

    for event in eventList:
        [
            start_date, end_date, max_mahal, mahal_quant, duration,
            hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip
        ] = event
        formattedEvent = [
            start_date, end_date,
            "%.2f" % max_mahal,
            "%.3f" % mahal_quant, duration, hours_above_thresh,
            "%.2f" % max_pace_dev,
            "%.2f" % min_pace_dev, worst_trip
        ]
        w.writerow(formattedEvent)
def detect_events_hmm(mahal_timeseries, c_timeseries, global_pace_timeseries,
                      threshold_quant=.95, trans_matrix = DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX, initial_state=None):
            
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)    

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)
    
    
    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if(mahal_list[i] > threshold or c_list[i]==1):
            symbols.append(1)
        else:
            symbols.append(0)
    
    

  
    
    # Actually set up the hmm
    model = MultinomialHMM(n_components=2, transmat=trans_matrix, startprob=initial_state)
    model.emissionprob_ = emission_matrix
    
    # Make the predictions
    lnl, predictions = model.decode(symbols)
    
    events = get_all_events(predictions, sorted_dates, mahal_list, global_pace_list,
                            expected_pace_list)
    
    # Sort events by duration, starting with the long events
    events.sort(key = lambda x: x[2], reverse=True)
    return events, predictions
Example #5
0
def detect_events_hmm(mahal_timeseries,
                      c_timeseries,
                      global_pace_timeseries,
                      threshold_quant=.95,
                      trans_matrix=DEFAULT_TRANS_MATRIX,
                      emission_matrix=DEFAULT_EMISSION_MATRIX,
                      initial_state=None):

    #Sort the keys of the timeseries chronologically
    sorted_dates = sorted(mahal_timeseries)

    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]
    c_list = [c_timeseries[d] for d in sorted_dates]
    global_pace_list = [global_pace_timeseries[d] for d in sorted_dates]
    expected_pace_list = [expected_pace_timeseries[d] for d in sorted_dates]

    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # The symbols array contains "1" if there is an outlier, "0" if there is not
    symbols = []
    for i in range(len(mahal_list)):
        if (mahal_list[i] > threshold or c_list[i] == 1):
            symbols.append(1)
        else:
            symbols.append(0)

    # Actually set up the hmm
    model = MultinomialHMM(n_components=2,
                           transmat=trans_matrix,
                           startprob=initial_state)
    model.emissionprob_ = emission_matrix

    # Make the predictions
    lnl, predictions = model.decode(symbols)

    events = get_all_events(predictions, sorted_dates, mahal_list,
                            global_pace_list, expected_pace_list)

    # Sort events by duration, starting with the long events
    events.sort(key=lambda x: x[2], reverse=True)
    return events, predictions
def detectWindowedEvents(mahal_timeseries, zscore_timeseries, global_pace_timeseries, 
                          out_file, window_size=6, threshold_quant=.95):
                              
    logMsg("Detecting events at %d%% bound" % int(threshold_quant*100))
                              
    #Sort the keys of the timeseries chronologically    
    sorted_dates = sorted(mahal_timeseries)
    
    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]

    
    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # Get the expected global pace    
    (expected_pace_timeseries, sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)        
    
    
    
    start_date = datetime(2010,1,1)
    end_date = datetime(2014,1,1)
    shift = timedelta(hours=window_size)
    
    prev_above_threshold = False
    current_event_start = None
    current_event_end = None
    eventList = []
    for date in dateRange(start_date, end_date, shift):
        #print
        #print(str(date))
        #print(prev_above_threshold)
        if(crossesThreshold(date, date+shift, mahal_timeseries, threshold)):
            #print("CROSS")
            if(not prev_above_threshold):
                #print("RESET")
                current_event_start = date
                
            current_event_end = date+shift
            prev_above_threshold=True
        else:
            if(prev_above_threshold):
                #print("*************OUTPUTTING************")
                #print("%s -> %s" % (current_event_start, current_event_end))
                start_key = keyFromDatetime(current_event_start)
                end_key = keyFromDatetime(current_event_end)
                event = computeEventProperties(start_key, end_key, mahal_timeseries, 
                                           global_pace_timeseries, expected_pace_timeseries,
                                           zscore_timeseries, sorted_mahal=sorted_mahal,
                                           mahal_threshold=threshold)
                #Add to list            
                eventList.append(event)
                
            prev_above_threshold =False
    
    #Sort events by duration, in descending order
    eventList.sort(key = lambda x: x[5], reverse=True)
    
    #Write events to a CSV file
    w = csv.writer(open(out_file, "w"))
    w.writerow(["start_date", "end_date", "max_mahal", "mahal_quant", "duration", "hours_above_thresh", "max_pace_dev",
                "min_pace_dev", "worst_trip"])
                
    for event in eventList:
        [start_date, end_date, max_mahal, mahal_quant, duration, hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip] = event
        formattedEvent = [start_date, end_date, "%.2f" % max_mahal, "%.3f" % mahal_quant, 
                          duration, hours_above_thresh, "%.2f" % max_pace_dev,
                          "%.2f" % min_pace_dev, worst_trip]
        w.writerow(formattedEvent)
    
    return eventList
def detectWindowedEvents(mahal_timeseries,
                         zscore_timeseries,
                         global_pace_timeseries,
                         out_file,
                         window_size=6,
                         threshold_quant=.95):

    logMsg("Detecting events at %d%% bound" % int(threshold_quant * 100))

    #Sort the keys of the timeseries chronologically
    sorted_dates = sorted(mahal_timeseries)

    #Generate the list of values of R(t)
    mahal_list = [mahal_timeseries[d] for d in sorted_dates]

    #Use the quantile to determine the threshold
    sorted_mahal = sorted(mahal_list)
    threshold = getQuantile(sorted_mahal, threshold_quant)

    # Get the expected global pace
    (expected_pace_timeseries,
     sd_pace_timeseries) = getExpectedPace(global_pace_timeseries)

    start_date = datetime(2010, 1, 1)
    end_date = datetime(2014, 1, 1)
    shift = timedelta(hours=window_size)

    prev_above_threshold = False
    current_event_start = None
    current_event_end = None
    eventList = []
    for date in dateRange(start_date, end_date, shift):
        #print
        #print(str(date))
        #print(prev_above_threshold)
        if (crossesThreshold(date, date + shift, mahal_timeseries, threshold)):
            #print("CROSS")
            if (not prev_above_threshold):
                #print("RESET")
                current_event_start = date

            current_event_end = date + shift
            prev_above_threshold = True
        else:
            if (prev_above_threshold):
                #print("*************OUTPUTTING************")
                #print("%s -> %s" % (current_event_start, current_event_end))
                start_key = keyFromDatetime(current_event_start)
                end_key = keyFromDatetime(current_event_end)
                event = computeEventProperties(start_key,
                                               end_key,
                                               mahal_timeseries,
                                               global_pace_timeseries,
                                               expected_pace_timeseries,
                                               zscore_timeseries,
                                               sorted_mahal=sorted_mahal,
                                               mahal_threshold=threshold)
                #Add to list
                eventList.append(event)

            prev_above_threshold = False

    #Sort events by duration, in descending order
    eventList.sort(key=lambda x: x[5], reverse=True)

    #Write events to a CSV file
    w = csv.writer(open(out_file, "w"))
    w.writerow([
        "start_date", "end_date", "max_mahal", "mahal_quant", "duration",
        "hours_above_thresh", "max_pace_dev", "min_pace_dev", "worst_trip"
    ])

    for event in eventList:
        [
            start_date, end_date, max_mahal, mahal_quant, duration,
            hours_above_thresh, max_pace_dev, min_pace_dev, worst_trip
        ] = event
        formattedEvent = [
            start_date, end_date,
            "%.2f" % max_mahal,
            "%.3f" % mahal_quant, duration, hours_above_thresh,
            "%.2f" % max_pace_dev,
            "%.2f" % min_pace_dev, worst_trip
        ]
        w.writerow(formattedEvent)

    return eventList