예제 #1
0
def change_point(connect_df, pen=None, epsilon=None, nbkp=1):
    '''
    Change point analysis for either the connectivity method or graph metrics

least sq - l1 l2
rbf radial basis functions

    a.create_posjac()
    t = connectivity(a,inorganics,plot=2)
    r= np.log10(t[t.sum().sort_values(ascending = False).index])


 change_point(np.array(r).T[:,::50])

    '''
    import ruptures as rpt
    import matplotlib.pyplot as plt
    print('calculating change points')
    # detection
    signal = np.array(connect_df)
    algo = rpt.Binseg(model='rbf').fit(
        signal
    )  #rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal)
    result = algo.fit_predict(signal, n_bkps=nbkp)
    print(result)
    #algo.predict(pen=pen)

    # display
    rpt.display(signal, result, np.array(result) / 2)
    plt.show()
예제 #2
0
def actvTrnsAutoDetect(Data,
                       Interval,
                       Time='YYYY/MM/DD_HH:MM:SS',
                       Model="mahalanobis",
                       Penalty=50):
    #Interval is in Minutes
    ###Not recommended to use. Current decetion algorithm is not very robust.
    import ruptures
    algo = ruptures.Pelt(model=Model).fit(Data)
    result = algo.predict(pen=Penalty)
    times = []
    Ttimes = []
    #plt.plot(Data)
    #plt.xticks(rotation=45)
    #plt.show()
    ruptures.display(Data, result)
    plt.show()
    for entry in result:
        time = Data.index[entry - 1]
        print("Time is: {}".format(time))
        #if entry is not 1 and entry is not len(self.data):
        if True:
            ###Excluding results that are first and last. It's usually meaningless.
            times.append(time)
            Ttimes.append([
                time - pandas.Timedelta(minutes=Interval / 2),
                time + pandas.Timedelta(minutes=Interval / 2)
            ])
    return Ttimes
예제 #3
0
def change_point(connect_df,pen = None,epsilon=None,nbkp=1):
    '''
    Change point analysis for either the connectivity method or graph metrics

least sq - l1 l2
rbf radial basis functions

    a.create_posjac()
    t = connectivity(a,inorganics,plot=2)
    r= np.log10(t[t.sum().sort_values(ascending = False).index])


 change_point(np.array(r).T[:,::50])

    '''
    import ruptures as rpt
    import matplotlib.pyplot as plt
    print('calculating change points')
    # detection
    signal = np.array(connect_df)
    algo = rpt.Binseg(model='rbf').fit(signal)#rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal)
    result = algo.fit_predict(signal, n_bkps=nbkp)
    print (result)
    #algo.predict(pen=pen)

    # display
    rpt.display(signal, result, np.array(result)/2)
    plt.show()
예제 #4
0
def get_change_points(log):
    attr_datetime = pm4py.get_attribute_values(log, 'time:timestamp')
    start_date = min(attr_datetime).date()
    end_date = max(attr_datetime).date()
    delta = datetime.timedelta(days=1)
    print("Start date: ", start_date, "\nEnd date: ", end_date)

    event_counts = {}
    i = start_date
    while i <= end_date:
        event_counts[i.strftime('%Y-%m-%d')] = 0
        #print(i)
        i += delta

    #print(event_counts)

    for t in attr_datetime:
        event_counts[t.date().strftime('%Y-%m-%d')] += 1

    dates = np.array(list(event_counts.values()))

    # detection
    algo = rpt.Pelt(model=MODEL).fit(dates)
    detect_result = algo.predict(pen=PENALTY)

    # display
    rpt.display(dates, detect_result, detect_result)
    plt.savefig('change_points.png')
    plt.show()
    print('Change point plot is saved as "change_points.png"')

    return event_counts, detect_result
예제 #5
0
def get_change_points(pos, neu, neg, pen=10, jump=1, min_size=2, plot=False):

    signal = (pos['count'].to_numpy(), neu['count'].to_numpy(),
              neg['count'].to_numpy())

    signal = np.stack(tuple(signal), axis=0).T

    cp = rpt.KernelCPD(kernel="rbf", jump=jump,
                       min_size=min_size).fit_predict(signal, pen=pen)

    if plot:
        rpt.display(signal, cp, cp)
        plt.show()

    return pos.index.values[cp[:-1]]
def FindChangePoints(BitScores, penalty, minimumSize, Display=False):
    # This function uses the module Ruptures to detect change points in the bit score map.
    algo = rpt.Pelt(min_size=minimumSize).fit(np.array(BitScores))
    result = algo.predict(pen=penalty)
    # After the change points are found using ruptures, the program makes sure that the last
    # index in the MSA is included as a change point
    if len(BitScores) - 1 not in result:
        result.append(len(BitScores) - 1)
    # Because python starts its counting at 0, if the length of the MSA is included as
    # a result, it will cause an indexing error if it's used. This is why it's removed
    # if it exists.
    if len(BitScores) in result:
        result.remove(len(BitScores))
    # It also makes sure the starting index is in the results
    if 0 not in result:
        result.append(0)
    result = sorted(result)

    # The average bit score value in each partitioned section of the bit score map
    # is then determined
    averages = []
    for changePoint in result:
        # If the last change point is found, then there will not be a change point after it,
        # so the loop ends
        if result.index(changePoint) == len(result) - 1:
            break
        # otherwise, the change point after the change point selected is chosen
        else:
            changePoint2 = result[result.index(changePoint) + 1]
        # The total bit score is set to zero
        total = 0
        # and then each bit score between the two change points is added to the total
        for score in BitScores[changePoint:changePoint2]:
            total += score
        # The average is then found by dividing the total by the length of the interval
        average = total / (changePoint2 - changePoint)
        # and the average is added to the list that will be returned to the user
        averages.append(average)
    # If the user has specified that they would like to see the change points
    # illustrated on the change point map, then the plot is printed.
    if Display == True:
        rpt.display(np.array(BitScores), result)
        plt.show()

    # The program then returns the list of change points and the average bit score values
    # between them to the user.
    return result, averages
예제 #7
0
    def figure(self):
        """
        Returns figure showing changepoints

        :rtype: :class:`matplotlib.figure.Figure`
        """
        fig, ax = rpt.display(self._signal, self.breakpoints())
        return fig
예제 #8
0
def test_1():
    # generate signal
    n_samples, dim, sigma = 1000, 3, 4
    n_bkps = 4  # number of breakpoints
    signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma)

    # detection
    # algo = rpt.Pelt(model="rbf").fit(signal[:,1])

    rpt_data = data_month['sku_num_sum'].values
    # rpt_data = data['sku_num_sum'].values

    algo = rpt.Pelt(model="rbf").fit(rpt_data)
    # algo = rpt.Binseg(model="rbf").fit(rpt_data)
    res_kps = algo.predict(pen=3)

    # display
    # rpt.display(signal, bkps, res_kps)
    rpt.display(rpt_data, res_kps)
    plt.show()
예제 #9
0
def find_change_points(session_vals, penalty=1000):
    vals = np.array(list(itertools.chain(*session_vals)))
    session_lens = [len(x) for x in session_vals]
    total_frames = np.concatenate(([0], np.cumsum(session_lens)))
    algo = rpt.Pelt(model='l2').fit(vals)
    change_points = algo.predict(pen=penalty)
    logging.debug('Changepoints detected: %s', change_points)
    logging.debug('Total frames per session: %s', total_frames)
    session_change_points = [[] for _ in range(len(session_lens))]
    if hasattr(sys, 'ps1'):  # interactive mode
        rpt.display(vals, total_frames, change_points)
        plt.title('Detected change points')
        plt.show(block=True)

    for i, change in enumerate(change_points[:-1]):
        session_i = bisect.bisect_right(total_frames, change) - 1
        session_start_frame = total_frames[session_i]
        session_change_points[session_i].append(change - session_start_frame)

    for i, session_len in enumerate(session_lens):
        session_change_points[i].append(session_len)
    return session_change_points
예제 #10
0
def segment_time_series(time_series, bkps=1, gamma=1e-2, display=True):
    """
    Implementation of ruptures, python library https://github.com/deepcharles/ruptures
    Usage: Segmentation of time-series data

    Inputs:
        time_series: time series kinematic data from the ReachMaster experimental system
        bkps : int, number of changepoints (reaches), 1 for simple detection
        display: Boolean, variable to set display functionality of ruptures
    Returns:

        rpt_result: List containing breakpoint indexes and the total length of the time-series array
    """

    params = {"gamma": gamma}
    algo = rpt.Dynp(model="rbf", params=params, jump=1,
                    min_size=2).fit(time_series)
    rpt_result = algo.predict(n_bkps=bkps)
    if display:
        rpt.display(time_series, bkps, rpt_result)
        plt.show()
    return rpt_result
예제 #11
0
def get_change_points(ts,
                      model="rbf",
                      jump=5,
                      pen=1,
                      segments_size=None,
                      figsize=(20, 3),
                      plot=True):
    if model == "constant":
        change_points = list(
            np.arange(0,
                      len(ts) + 1)[segments_size::segments_size])
        if change_points[-1] < (len(ts)):
            change_points.append(len(ts))
    else:
        algo = rpt.Pelt(model=model, jump=jump).fit(ts)
        change_points = algo.predict(pen=pen)
    if plot:
        rpt.display(ts,
                    true_chg_pts=change_points,
                    computed_chg_pts=change_points,
                    figsize=figsize)
        plt.show()
    return change_points
예제 #12
0
import matplotlib.pyplot as plt
import ruptures as rpt

# generate signal
n_samples, dim, sigma = 1000, 3, 4
n_bkps = 4  # number of breakpoints
signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma)

print(type(signal))

# detection
algo = rpt.Pelt(model="rbf").fit(signal)
result = algo.predict(pen=10)

# display
rpt.display(signal, bkps, result)
plt.show()
예제 #13
0
    def change_point(self,
                     width: int,
                     cut_off: list,
                     custom_cost,
                     jump: int,
                     pen: float,
                     results_show: bool,
                     title=None,
                     save_path=None,
                     fig_name=None):
        '''                
        ----------------
        DESCRIPTION
        ----------------
        The purpose of the change point detection is to check whether there is a large enough sudden change 
        in a specific interval interval of the resistance signal.
        If there is a large enough change, it means that the explosion phenomenon has occurred during this welding.
        the algorithms of detection can be fund by this link:
        
        https://centre-borelli.github.io/ruptures-docs/index.html#documentation

        for the resistance data especially MDK2 this methode can be used to detective if a change point in selectarea
        if there is a change point, mean value before change point and after change point will be compared --> delta R
        else no change point delta R = 0
        because of material loss the dalta R musst bigger than 0, if the there is a chagne point but delta R < 0, 
        this situation has nothing to do with spritzer rarely occurs delta R can also be 0 
        and as usaual the resistance curve is going down with the time  
        ----------------
        PARAMETER
        ----------------
        width: int windows width 40
        cutoff: list [float, float], float: 0...1 1 means all data length will be selected [0.15, 0.45]
        custom_cost:  https://centre-borelli.github.io/ruptures-docs/costs/index.html
        jump: int subsample (one every jump points) 5
        pen:  float penalty value (>0) 2
        result_show : show image evaluation to displan the detective result
        title: the image title
        save_path: the path to save the result image
        fig_name: the image name
        ----------------
        RETURN
        ----------------
        delta_R: the Variation before and after the change point of resistance signal
        '''

        ab_R = self.R_data[round(len(self.valley_id) * cut_off[0]
                                 ):round(len(self.valley_id) *
                                         cut_off[1])].values

        c = custom_cost

        algo = rpt.Window(width=width, custom_cost=c,
                          jump=jump).fit_predict(ab_R, pen=2)

        if len(algo) >= 2:
            delta_R = np.mean(ab_R[:algo[0]]) - np.mean(ab_R[algo[0]:])
            if delta_R < 0:
                # delta_R can not less than 0 bescause the the resistance curve is going down with the time
                delta_R = 0
        else:
            delta_R = 0

        if results_show:
            rpt.display(ab_R, algo)
            if title != None:
                plt.title(title)
            if save_path and fig_name is not None:
                save_fig(image_path=save_path, fig_name=fig_name)
            plt.show()

        return delta_R
예제 #14
0

nonlinear2_abrupt_raw = create_simdata.nonlinear3_abrupt()
nonlinear2_abrupt = functions.preprocess_timeseries(nonlinear2_abrupt_raw, windowsize=20)

plt.plot(nonlinear2_abrupt)

lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt)


signal = nonlinear2_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5',
                                  'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy()

algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:,1:])
my_bkps = algo.predict(pen=18)
fig, (ax,) = rpt.display(signal[:,0], my_bkps, figsize=(10, 6))
plt.show()



lin1_abrupt = create_simdata.linear1_abrupt()
lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt) #cuts out the first 10 observations
signal = lin1_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5',
                                  'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy()

signal = lin1_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5',
                                  'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy()

algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:,1:])
bkps = algo.predict(pen=30)
예제 #15
0
plt.xlabel("RankedCells")
plt.ylabel("Moving Avg Topic Probability")
plt.savefig(os.path.join(sc.settings.figdir, name + "_TopicMovingAvg.png"))
plt.clf()
convolvedSD = moving_average(adata.obs['percent_ribo'].tolist(), 300)
plt.plot(range(len(convolvedSD)), convolvedSD)
plt.title("Moving Average Percent Ribo")
plt.savefig(os.path.join(sc.settings.figdir, name + "_RiboCounts.png"))
plt.clf()
convolvedSD = moving_average(adata.obs['percent_mito'].tolist(), 300)
plt.plot(range(len(convolvedSD)), convolvedSD)
plt.title("Moving Average Percent Mito")
plt.savefig(os.path.join(sc.settings.figdir, name + "_MitoCounts.png"))
plt.clf()
signal = np.column_stack(
    (np.std(doc_topic, axis=1), adata.obs['percent_ribo'].tolist(),
     adata.obs['percent_mito'].tolist(), np.array(list(adata.obs.n_counts))))

algo = rpt.Window(width=2500, model="l1").fit(signal)
result = algo.predict(pen=50)
costs = []
for i in range(len(result) - 1):
    costs.append(algo.cost.sum_of_costs([result[i], result[len(result) - 1]]))
rpt.display(signal=signal, true_chg_pts=result, computed_chg_pts=result)
plt.title(str(np.argmin(costs)) + ' <- Best cPoint')
plt.savefig(os.path.join(sc.settings.figdir, name + "_Changepoints.png"))
plt.clf()
print_top_words(ldaM, adata.var.index, 15)
table_top_words(ldaM, adata.var.index, 25).to_csv(
    os.path.join(sc.settings.figdir, name + "_TopicMarkers.txt"))
예제 #16
0
arr = []

for j in range(X.shape[1]):
    day = X.loc[
        X.iloc[:, j] >
        0]  #Excluding masked pixels. they are assigned a negative value

    value = day.iloc[:, j].mean()
    if value > 0:
        arr.append(value)

#Changepoint detection with the Pelt search method
signal = np.array(arr)
algo = rpt.Pelt(model="rbf").fit(signal)
result = algo.predict(pen=10)
rpt.display(signal, result)
plt.title('Change Point Detection: Pelt Search Method')
plt.show()

#Changepoint detection with the Binary Segmentation search method
model = "l2"
algo = rpt.Binseg(model=model).fit(signal)
my_bkps = algo.predict(n_bkps=10)
# show results
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Binary Segmentation Search Method')
plt.show()

#Changepoint detection with window-based search method
model = "l2"
algo = rpt.Window(width=40, model=model).fit(signal)
예제 #17
0
                Fos_pred[loopNo] = F[int(idxo_pred)]
                
                if bkps:
                    tbkps = np.zeros(len(bkps)-1)
                    for l in range(0,len(bkps)-1):
                        tbkps[l] = t[bkps[l]-1]
                
                bkpsTrue = bkps
                bkpsCal = bkps
                bkpsCal = np.zeros_like(bkps)
                bkpsCal = np.zeros_like(bkps)
                for x in range(0,len(bkpsTrue)):
                    bkpsTrue[x] = bkps[-1]
                 
                if segPlt > 0:            
                    fig, (ax,) = rpt.display(norm(gradF), bkpsTrue, bkpsCal)
                    fig, (ax,) = rpt.display(F, bkpsTrue, bkpsCal)
                    fig, (ax,) = rpt.display(s, bkpsTrue, bkpsCal)
              
############-Plot
            if showPlt > 0:
                if ruleBased > 0:
                    plt.figure()
                    plt.plot(norm(F[:trim_to]))
                    plt.plot(norm(gradF[:trim_to]))
                    plt.plot(norm(grad2F[:trim_to]))
                    plt.plot(norm(s[:trim_to]))
                    #plt.plot(norm(A[:trim_to]))
                    plt.axvline(idxo, color="red", linestyle = "--")
                    plt.axvline(idxo_pred,color="black", linestyle = "--") 
                else:    
예제 #18
0
https://github.com/deepcharles/ruptures
"""
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import ruptures as rpt
import matplotlib
import pandas as pd
import os
import numpy as np
from _config import Config
matplotlib.use('TkAgg')

if __name__ == '__main__':
    # Read input data from csv
    input_data = pd.read_csv(os.path.join('data', 'input_data.csv'),
                             header=0,
                             index_col=[0],
                             parse_dates=[0])

    print('Shape of input data = ', input_data.shape)

    # Detection with PELT
    algo = rpt.Pelt(model="rbf").fit(input_data)
    result = algo.predict(pen=100)

    print(','.join([str(input_data.index[i - 1]) for i in result]))

    # Display
    rpt.display(input_data, result, figsize=(12, 6))
    plt.show()
예제 #19
0
import yfinance as yf
import matplotlib.pyplot as plt
import ruptures as rpt

msft = yf.Ticker("SPY")

data = yf.download('spy', start='2019-01-01', end='2019-07-11')
data = data.Close.values

algo = rpt.Pelt(model='rbf').fit(data)
result = algo.predict(pen=10)

rpt.display(data, result=result)
plt.show()
plt.show()

plt.plot(time_keeper, nbr_communities_over_time)
plt.title('Enron Emails Network: Infomap Community Count by Week')
plt.xlabel('Time')
plt.ylabel('Number of Communities')
plt.show()

# Change point detection
weeks_to_ignore = 4

algo = rpt.Pelt(model="rbf").fit(np.array(avg_degree_over_time))
result = algo.predict(pen=4)
print("Change points in diameter: ",
      [time_keeper[i] for i in result[:len(result) - 1]])
rpt.display(np.array(avg_degree_over_time), [weeks_to_ignore], result)
plt.title("Change Points in Avg Degree Centrality Over Time")
plt.tight_layout()
plt.show()

algo = rpt.Pelt(model="rbf").fit(np.array(diameter_over_time))
result = algo.predict(pen=4)
print("Change points in diameter: ",
      [time_keeper[i] for i in result[:len(result) - 1]])
rpt.display(np.array(diameter_over_time), [weeks_to_ignore], result)
plt.title("Change Points in Diameter Over Time")
plt.tight_layout()
plt.show()

algo = rpt.Pelt(model="rbf").fit(np.array(nbr_communities_over_time))
result = algo.predict(pen=4)
예제 #21
0
# data = sio.loadmat('./data/shiftcorr%d'%idx) # poor results
# data = sio.loadmat('./data/shiftlinear%d'%idx) # poor results
# data = sio.loadmat('./data/singledimshiftfreq%d'%idx)# poor results
# data = sio.loadmat('./data/agotsshiftmean%d'%idx)
# data = sio.loadmat('./data/agotsshiftvar%d'%idx)
data = sio.loadmat('./data/extreme%d' % idx)  # good esults
ts = data['ts']  #.T # transpose is needed for shiftfreq
print(ts.shape)
bkps = data['bkps'][0]

scaler = StandardScaler()
ts = scaler.fit_transform(ts)

width = 10
step = 5
ts = [ts]
segment = SegmentX(width=width, step=step)
x = segment.fit_transform(ts, None)[0]
x = x.reshape([x.shape[0], -1])
x = torch.from_numpy(x).float()
bkss = bkps // 5  #bkss for break samples

model = AutoEncoder(input_dim=10, latent_dim=1, output_dim=10)

_, pred = model.fit_predict(x)

err = (pred - x).detach().numpy()
err = np.max(np.power(err, 2), axis=1)
rpt.display(err, true_chg_pts=bkss)
rpt.display(ts[0], true_chg_pts=bkps)
plt.show()
예제 #22
0
   

total = result[1] - result[0]
print(A/total)
print(B/total)
print(C/total)
print()

A = 0
B = 0
C = 0

for i in range(result[1], len(choice_sequence)):
    if choice_sequence[i][0] == 1:
        A = A + 1
    if choice_sequence[i][0] == 2:
        B = B + 1
    if choice_sequence[i][0] == 3:
        C = C + 1
    

total = len(choice_sequence) - result[1]
print(A/total)
print(B/total)
print(C/total)

print()


rpt.display(choice_sequence, result)
plt.show()
예제 #23
0
def find_changepoints_for_time_series(series,
                                      modeltype="binary",
                                      number_breakpoints=10,
                                      plot_flag=True,
                                      plot_with_dates=False,
                                      show_time_flag=False):

    #RUPTURES PACKAGE
    #points=np.array(series)
    points = series.values
    title = ""

    t0 = time.time()
    if modeltype == "binary":
        title = "Change Point Detection: Binary Segmentation Search Method"
        model = "l2"
        changepoint_model = rpt.Binseg(model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "pelt":
        title = "Change Point Detection: Pelt Search Method"
        model = "rbf"
        changepoint_model = rpt.Pelt(model=model).fit(points)
        result = changepoint_model.predict(pen=10)
    if modeltype == "window":
        title = "Change Point Detection: Window-Based Search Method"
        model = "l2"
        changepoint_model = rpt.Window(width=40, model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "Dynamic":
        title = "Change Point Detection: Dynamic Programming Search Method"
        model = "l1"
        changepoint_model = rpt.Dynp(model=model, min_size=3,
                                     jump=5).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "online":
        # CHANGEFINDER PACKAGE
        title = "Simulates the working of finding changepoints in online fashion"
        cf = changefinder.ChangeFinder()
        scores = [cf.update(p) for p in points]
        result = (-np.array(scores)).argsort()[:number_breakpoints]
        result = sorted(list(result))
        if series.shape[0] not in result:
            result.append(series.shape[0])

    if show_time_flag:
        elapsed_time = time.time() - t0
        print("[exp msg] elapsed time for process: " +
              str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))

    if plot_flag:
        if not plot_with_dates:
            rpt.display(points, result, figsize=(18, 6))
            plt.title(title)
            plt.show()
        else:
            series.plot(figsize=(18, 6))
            plt.title(title)
            for i in range(len(result) - 1):
                if i % 2 == 0:
                    current_color = 'xkcd:salmon'
                else:
                    current_color = 'xkcd:sky blue'
                #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3)
                plt.fill_between(series.index[result[i]:result[i + 1]],
                                 y1=series.max() * 1.1,
                                 y2=series.min() * 0.9,
                                 color=current_color,
                                 alpha=0.3)
            plt.show()

    return (result)
예제 #24
0
elif sys_platform == 'mac':
    import matplotlib
    from matplotlib.font_manager import FontProperties

    matplotlib.use('TkAgg')

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

plt.style.use('seaborn')

data_path = u'/Users/longguangbin/Work/Documents/SAAS/安踏线下/季节性/sample/duanku_sales.xls'
data = pd.read_excel(data_path)

import ruptures as rpt

# # generate signal
n_samples, dim, sigma = 1000, 3, 4
n_bkps = 4  # number of breakpoints
signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma)

# detection
# algo = rpt.Pelt(model="rbf").fit(signal[:,1])
algo = rpt.Pelt(model="rbf").fit(data['sku_num_sum'].values)
result = algo.predict(pen=10)

# display
rpt.display(signal, bkps, result)
rpt.display(data['sku_num_sum'].values, result)
plt.show()
예제 #25
0
    def destill_profiles(dst,
                         #                      profile2top = False
                         ):
        # precondition the data

        def profile2top(sectioned):
            sect_tmp = sectioned.copy()
            sect_cponly = sect_tmp[~sect_tmp.changepoints.isna()]
            # sect_tmp.classfied[~sect_tmp.changepoints.isna()] == 0
            # sect_cponly.classfied == 0
            profcounter = 1
            for ts, row in sect_cponly.iterrows():
                if row.profile == -8888:
                    profcounter += 1
                elif np.isnan(row.profile):
                    continue
                else:
                    sect_tmp.loc[ts, 'profile'] = profcounter
            return sect_tmp

        def fillit(sectioned_tmp):
            sectioned_tmp.profile.ffill(inplace=True)
            sectioned_tmp.profile[sectioned_tmp.profile == 0] = np.nan
            sectioned_tmp.profile[sectioned_tmp.profile == -9999] = np.nan
            sectioned_tmp.profile[sectioned_tmp.profile == -8888] = np.nan

        def remove_cp_when_section2short(df, mintimedelta):
            df[~df.changepoints.isna()]
            idx = df[~df.changepoints.isna()].index
            idx_delta = idx[1:] - idx[:-1]
            mintimedelta = pd.to_timedelta(mintimedelta)
            where = idx_delta < mintimedelta
            where = np.append(where[::-1], False)[::-1]
            df.loc[idx[where], 'changepoints'] = np.nan

        alt = dst.altitude.to_dataframe().altitude

        ## remove falty datapoints
        alt[alt < ignor_altitudes_smaller_than] = np.nan

        ## smoothen the altitude and then get the derivative
        ints = 5

        altsmooth = alt.resample(f'{ints}s').mean()
        div = np.gradient(altsmooth)
        altsmooth = pd.DataFrame(altsmooth)
        altsmooth['deriv'] = div

        if 0:
            a = alt.plot()
            altsmooth.altitude.plot(ax=a)

            at = a.twinx()
            altsmooth.deriv.plot(ax=at, color=colors[2])

    # ruptures

    ## make the points for the change point analysis
        altsmooth.interpolate(inplace=True)
        altsmooth.dropna(inplace=True)
        points = altsmooth.deriv.values

        ## do the change point analysis
        model = "rbf"
        algo = rpt.Pelt(model=model).fit(points)
        result = algo.predict(pen=pen)

        ## plot results
        # %matplotlib inline
        # plt.rcParams['figure.dpi']=200

        if 0:
            f, aa = rpt.display(points, result, figsize=(10, 6))
            for a in aa:
                at = a.twinx()
                at.plot(altsmooth.altitude.values, color=colors[1])

    # convert the change point info into profiles
        sectioned = pd.DataFrame(index=dst.datetime.to_pandas().index,
                                 columns=[
                                     'changepoints', 'slope', 'classfied',
                                     '_alt_avg', '_alt_at_cp', '_alt_min',
                                     'profile'
                                 ])

        ## mark change points
        sectioned.changepoints.loc[altsmooth.iloc[result[:-1]].index] = 1
        sectioned.changepoints.iloc[-1] = 1

        ## remove change points where section is too short
        remove_cp_when_section2short(sectioned, mintimedelta)

        ## add aditional parameters to sections between change points
        startime = sectioned.index[0]
        for ts, cp in sectioned.changepoints.dropna().iteritems():
            endtime = ts

            sect = alt.loc[startime:endtime]
            df = pd.DataFrame(sect)
            df['s'] = range(df.shape[0])
            dfwn = df.copy()  # a copy with all the nans still in it
            df.dropna(inplace=True)

            if df.shape[0] == 0:
                continue
            res = sp.stats.linregress(df.s, df.altitude)
            sectioned.slope.loc[dfwn.index] = res.slope
            sectioned._alt_avg.loc[dfwn.index] = sect.mean()
            sectioned._alt_min.loc[dfwn.index] = sect.min()
            sectioned._alt_at_cp.loc[ts] = sect.dropna().iloc[-1]
            startime = ts

    ## classify sections between change points
        ground = 0
        park = 1
        up = 2
        down = 3
        sectioned.classfied[
            sectioned.slope.abs() < threshold_park_slope] = park
        sectioned.classfied[sectioned.slope >= threshold_park_slope] = up
        sectioned.classfied[sectioned.slope < -threshold_park_slope] = down

        where = np.logical_and(
            sectioned.classfied == park,
            sectioned._alt_min.interpolate() < threshold_ground_alt)
        sectioned.classfied[where] = ground

        ## combine sections into profiles
        profile_id = 0
        cp1 = 0
        cp2 = 0
        cp3 = 0
        uod = lambda x: x if row.classfied == 2 else -x
        # profile_id = iter(range(100))
        inside = False

        sect_cponly = sectioned[~sectioned.changepoints.isna()]
        for ts, row in sect_cponly.iterrows():
            # for ts, row in sectioned.iterrows():
            try:
                #         print('works')
                row_next = sect_cponly.iloc[sect_cponly.index.get_loc(ts) + 1]
                # ts_next = row_next.name
            except IndexError:
                #         print('doese')
                # this happens if we are at the end of the list
                row_next = row.copy()
                row_next.name = pd.to_datetime('2200-01-01 00:00:00')
                row_next.classfied = -9999

            try:
                row_next_next = sect_cponly.iloc[
                    sect_cponly.index.get_loc(row_next.name) + 1]
                # ts_next_next = row_next_next.name
            except:
                row_next_next = row.copy()
                row_next_next.name = pd.to_datetime('2200-01-01 00:00:00')
                row_next_next.classfied = -9999
                # row_next_next = False
                # ts_next_next = pd.to_timedelta('2200-01-01 00:00:00')

            #     if ts.__str__() == '2017-05-23 17:55:45':
            #         break
            if not inside:
                if row.classfied == ground:
                    continue
                elif row.classfied == park:
                    continue
                elif pd.isna(row.classfied):
                    continue
                elif row.classfied in [up, down]:
                    #                 cp1 += 1
                    profile_id += 1
                    sectioned.loc[ts, 'profile'] = uod(profile_id)
                    inside = row.classfied
                    time_at_classified = ts
                else:
                    assert (False)  # should not be possible
            if inside in [up, down]:
                if row.classfied == ground:
                    sectioned.loc[ts, 'profile'] = -8888
                    inside = False
                elif row.classfied == inside:
                    sectioned.loc[ts, 'profile'] = uod(profile_id)
                    time_at_classified = ts  # This is the time at which a valid change is happening

                # elif not isinstance(row_next_next, bool) :
                elif ((row_next.name - time_at_classified) <
                      pd.to_timedelta(mintime2interrupt_profile)
                      ) and row_next_next.classfied == inside:
                    # if not isinstance(row_next_next, bool):
                    # if row_next_next.classfied == inside:
                    sectioned.loc[ts, 'profile'] = np.nan

                elif row.classfied == park:
                    # if row_next.classfied == inside:
                    #     sectioned.loc[ts, 'profile'] = np.nan  # uod(profile_id)
                    # if (row_next.name - time_at_classified) < pd.to_timedelta(mintime2interrupt_profile):
                    #     sectioned.loc[ts, 'profile'] = np.nan
                    # else:
                    sectioned.loc[ts, 'profile'] = 0
                    inside = False
                #             continue

                elif pd.isna(row.classfied):
                    #                 cp2 += 1
                    inside = False
                    sectioned.loc[ts, 'profile'] = -9999
                    continue
                elif row.classfied != inside:
                    #                 cp3 += 1
                    # if (row_next.name - time_at_classified) < pd.to_timedelta(mintime2interrupt_profile):
                    #     sectioned.loc[ts, 'profile'] = np.nan
                    # else:
                    profile_id += 1
                    sectioned.loc[ts, 'profile'] = uod(profile_id)
                    inside = row.classfied
                else:
                    assert (False)  # noep
            else:
                assert (False)  # should not be possible

    ## combine even further if profiles comprise of everything up to the top and then down again

        sectioned_coarse = profile2top(sectioned)

        ## fill all the gaps inbetween the checkpoints

        fillit(sectioned)
        fillit(sectioned_coarse)

        ## continueation of global profiles (s.o.) distinguish between up and down
        for profcounter in sectioned_coarse.profile.dropna().unique():
            altsec = alt[sectioned_coarse.profile == profcounter]
            tmax = altsec.idxmax()
            sectioned_coarse.profile[np.logical_and(
                sectioned_coarse.profile == profcounter,
                sectioned_coarse.index > tmax)] = -profcounter

        #     break

    ## test if profile reaches the ground and remove if not

        def test_if_grounded(sectioned_tmp):
            minaltsofprofs = alt.groupby(sectioned_tmp.profile).min()

            for prof, minalt in minaltsofprofs[
                    minaltsofprofs > threshold_ground_alt].iteritems():
                sectioned_tmp.profile[sectioned_tmp.profile == prof] = np.nan

        if has2be_connected2ground:
            test_if_grounded(sectioned)
            test_if_grounded(sectioned_coarse)

    ## remove sections that are too short or not enought elevation difference

        def remove_profiles_2short_or_heigh(sectioned):
            for pf in sectioned.profile.dropna().unique():
                altsect = alt[sectioned.profile == pf]
                dt = altsect.index[-1] - altsect.index[0]
                dalt = abs(altsect.max() - altsect.min())
                if (dt < pd.to_timedelta(minprofile_duration)
                    ) or dalt < minprofile_altdiff:
                    sectioned.profile[sectioned.profile == pf] = np.nan

        remove_profiles_2short_or_heigh(sectioned)
        remove_profiles_2short_or_heigh(sectioned_coarse)

        ## if one was deleted we have to shift all sucessive ones to avoid confusion
        while 1:
            profnos = abs(sectioned.profile.dropna().unique())
            diff = profnos[1:] - profnos[:-1]
            if len(np.unique(diff)) > 1:
                idx = diff.argmax() + 1
            else:
                break

            for pn in profnos[idx:]:
                where = sectioned.profile.abs() == pn

                if sectioned.profile[where].unique()[0] > 0:
                    fct = 1
                elif sectioned.profile[where].unique()[0] < 0:
                    fct = -1
                sectioned.profile[where] = fct * (pn - 1)

        ## plot result
        #     if 1:
        #         # %matplotlib inline
        #         # plt.rcParams['figure.dpi']=200

        #         a = sectioned.profile.plot()
        #         at = a.twinx()
        #         alt.plot(ax = at, color = colors[1])
        #         for idx in sect_cponly.index:
        #             a.axvline(idx, color = colors[2], lw = 0.5, ls = '--')

        out = {}
        out['sectioned_deteiled'] = sectioned
        out['sectioned_coarse'] = sectioned_coarse
        return out
예제 #26
0
def make_neighborhood_rank_divergence_plot(rank_df, adj_df):
    rank_df.sort_values('rank', inplace=True, ascending=True)

    divergences = np.zeros(len(rank_df.index))
    for i, (county, rank) in enumerate(zip(rank_df['County'],
                                           rank_df['rank'])):
        neighbors = adj_df.loc[adj_df.source == county, 'destination']

        if len(neighbors) == 0:
            neighbors = adj_df.loc[adj_df.destination == county, 'source']

        rank_ind = rank_df.County.isin(neighbors).values
        neighbor_ranks = rank_df.loc[rank_ind, 'rank']
        divergence = np.abs(rank - neighbor_ranks).mean()
        divergences[i] = divergence

        if np.isnan(divergence):
            print(county)
            print(neighbors)
            print(neighbor_ranks)

    rank_df['rank_div'] = divergences

    # Change point detection
    signal = rank_df['rank_div'].rolling(100).mean().dropna().values
    # model = {'l1', 'l2', 'rbf', 'linear', 'normal', 'ar'}
    pelt_bkps = rpt.Pelt(model='rbf').fit(signal).predict(pen=100)
    window_bkps = rpt.Window(width=1000,
                             model='l2').fit(signal).predict(n_bkps=1)
    bin_bkps = rpt.Binseg(model='l2').fit(signal).predict(n_bkps=1)
    ensemble_bkp = np.mean(
        [*pelt_bkps[:-1], *window_bkps[:-1], *bin_bkps[:-1]])

    print('Identified Breakpoints:'
          f'\n\tPelt Breakpoints:    {pelt_bkps[:-1]}'
          f'\n\tWindow Breakpoints:  {window_bkps[:-1]}'
          f'\n\tBinary Breakpoints:  {bin_bkps[:-1]}'
          f'\n\tEnsemble Breakpoint: {ensemble_bkp}')

    plt.scatter(
        rank_df['rank'].values,
        rank_df['rank_div'].values,
        facecolor='None',
        edgecolor=sns.xkcd_rgb['denim blue'],
        linewidth=2,
        label='Data',
    )
    plt.plot(
        rank_df['rank'].values,
        rank_df['rank_div'].rolling(100).mean(),
        color='darkorange',
        label='Rolling Mean',
    )

    y_min, y_max = divergences.min(), divergences.max()
    y_range = y_max - y_min
    plt.plot([ensemble_bkp, ensemble_bkp],
             [y_min - 0.1 * y_range, y_max + 0.1 * y_range],
             'k--',
             label='Estimated Breakpoint')
    plt.legend()
    plt.title('Mean Neighborhood Rank Divergence')
    plt.xlabel('Quality of Life Rank (Lower is better)')
    plt.ylabel('Rank Divergence')
    plt.tight_layout()
    ymin, ymax = plt.gca().get_ylim()
    figsize = plt.gcf().get_size_inches()
    plt.savefig('../output/neighborhood_rank_divergence.png', dpi=600)
    plt.close('all')

    # Visualize change points
    bkps = []
    rpt.display(
        signal,
        bkps,
        pelt_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Pelt Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_pelt.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        window_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Window Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_window.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        bin_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Binary Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_binary.png', dpi=600)
    plt.close('all')
def windows(series, window_size=20, pen=2):
    algo = rpt.Window(width=window_size, model="l2").fit(series)
    result = algo.predict(pen=2)
    rpt.display(series, result)
    plt.show()
    return result
예제 #28
0
            except Exception as e:
                print('failed to plot persist anomalies for %s - %s' % (base_name, e))

    timer_start_ruptures = timer()
    for base_name in found_level_shift_and_persists:
        timeseries = more_analysis_metrics_timeseries[base_name]['timeseries']
        if not timeseries:
            print('failed to find timeseries for %s' % base_name)
            continue
        working_timeseries_timestamps = [int(ts) for ts, value in timeseries]
        working_timeseries_values = [v for ts, v in timeseries]
        working_values = np.array(working_timeseries_values)
        algo_c = rpt.KernelCPD(kernel='linear', min_size=12).fit(working_values)  # written in C
        results = algo_c.predict(pen=2)
        values = working_values
        rpt.display(values, results, figsize=(18, 6))
        title = '%s' % base_name
        plt.title(title)
        plt.show()
    timer_end_ruptures = timer()
    print('%s metrics analysed with ruptures, took %.6f seconds' % (
        str(len(found_level_shift_and_persists)),
        (timer_end_ruptures - timer_start_ruptures)))


# @added 20210726 - Info #4198: ppscore
# ppscore is the best cloudburst candidate algorithm found to data

####
    return cloudbursts_found, plot_images
    #Format the 'Date' column
    price_df['Date'] = price_df['Date'].astype(str).str[:-3]
    #Convert the Date column into a date object
    price_df['Date'] = pd.to_datetime(price_df['Date'], format='%Y %m%d')
    #Subset to only include data going back to 2014
    price_df = price_df[(price_df['Date'] >= '2014-01-01')]

    #Convert the time series values to a numpy 1D array
    points = np.array(price_df['WTI_Price'])

    #RUPTURES PACKAGE
    #Changepoint detection with the Pelt search method
    model = "rbf"
    algo = rpt.Pelt(model=model).fit(points)
    result = algo.predict(pen=10)
    rpt.display(points, result, figsize=(10, 6))
    plt.title('Change Point Detection: Pelt Search Method')
    plt.show()

    #Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    # show results
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.show()

    #Changepoint detection with window-based search method
    model = "l2"
    algo = rpt.Window(width=40, model=model).fit(points)
예제 #30
0
 def show(self):
     rpt.display(self.signal, self.breakpoints())
     plt.show()
예제 #31
0
def main():
	USER_ID, CLIENT_SECRET, server = instantiate_server()
	ACCESS_TOKEN, REFRESH_TOKEN = get_access_token(server), get_refresh_token(server)
	auth_client = get_auth_client(USER_ID, CLIENT_SECRET, ACCESS_TOKEN, REFRESH_TOKEN)

	user_id = get_fitbit_user_id(get_user_information(server))
	date_str = '2019-02-14'
	heart_data = get_heart_intraday(get_heart_data(auth_client, date_str), user_id)
	# print(heart_data)
	calories_data = get_calories_intraday(get_calories_data(auth_client, date_str), user_id)
	# print(calories_data)
	steps_data = get_steps_intraday(get_steps_data(auth_client, date_str), user_id)
	# print(steps_data)

	heart_beat_vals = np.array([minute_heart['value'] for minute_heart in heart_data])
	heart_beat_vals = heart_beat_vals - np.mean(heart_beat_vals)
	heart_beat_vals = heart_beat_vals / np.std(heart_beat_vals)
	calories_vals = np.array([minute_calories['value'] for minute_calories in calories_data])
	calories_vals = calories_vals - np.mean(calories_vals)
	calories_vals = calories_vals / np.std(calories_vals)
	activity_vals = np.array([minute_calories['level'] for minute_calories in calories_data])
	#activity_vals = activity_vals - np.mean(activity_vals)
	#activity_vals = activity_vals / np.std(activity_vals)
	steps_vals = np.array([minute_steps['value'] for minute_steps in steps_data])
	steps_vals = steps_vals - np.mean(steps_vals)
	steps_vals = steps_vals / np.std(steps_vals)

	all_vals = np.array(list(zip(heart_beat_vals, calories_vals, steps_vals))).reshape(-1, 3)
	print(all_vals.shape)

	model = 'l2'
	min_size_val = 1

	algo = rpt.Pelt(model=model, min_size=min_size_val).fit(heart_beat_vals)
	result = algo.predict(pen=10)
	rpt.display(heart_beat_vals, result)
	plt.gcf().axes[0].set_title(f'Heart Beat: model={model} min_size={min_size_val}')
	plt.savefig(f'../data/plots/changepoint/heart_model={model}_min_size={min_size_val}.png')
	plt.gcf().axes[0].plot(activity_vals, 'r')
	plt.savefig(f'../data/plots/changepoint/Activity_Overlap_heart_model={model}_min_size={min_size_val}.png')
	# plt.show()

	algo = rpt.Pelt(model=model, min_size=min_size_val).fit(calories_vals)
	result = algo.predict(pen=10)
	rpt.display(calories_vals, result)
	plt.gcf().axes[0].set_title(f'Calories: model={model} min_size={min_size_val}')
	plt.savefig(f'../data/plots/changepoint/calories_model={model}_min_size={min_size_val}.png')
	plt.gcf().axes[0].plot(activity_vals, 'r')
	plt.savefig(f'../data/plots/changepoint/Activity_Overlap_calories_model={model}_min_size={min_size_val}.png')
	# plt.show()

	algo = rpt.Pelt(model=model, min_size=min_size_val).fit(steps_vals)
	result = algo.predict(pen=10)
	rpt.display(steps_vals, result)
	plt.gcf().axes[0].set_title(f'Steps: model={model} min_size={min_size_val}')
	plt.savefig(f'../data/plots/changepoint/steps_model={model}_min_size={min_size_val}.png')
	plt.gcf().axes[0].plot(activity_vals, 'r')
	plt.savefig(f'../data/plots/changepoint/Activity_Overlap_steps_model={model}_min_size={min_size_val}.png')
	# plt.show()

	algo = rpt.Pelt(model=model, min_size=min_size_val).fit(activity_vals)
	result = algo.predict(pen=10)
	rpt.display(activity_vals, result)
	plt.gcf().axes[0].set_title(f'Activity: model={model} min_size={min_size_val}')
	plt.savefig(f'../data/plots/changepoint/activity_model={model}_min_size={min_size_val}.png')
	plt.gcf().axes[0].plot(activity_vals, 'r')
	plt.savefig(f'../data/plots/changepoint/Activity_Overlap_activity_model={model}_min_size={min_size_val}.png')
	# plt.show()

	algo = rpt.Pelt(model=model, min_size=min_size_val).fit(all_vals)
	result = algo.predict(pen=10)
	rpt.display(all_vals, result)
	plt.gcf().axes[0].set_title(f'Heart Rate: model={model} min_size={min_size_val}')
	plt.gcf().axes[1].set_title(f'Calories: model={model} min_size={min_size_val}')
	plt.gcf().axes[2].set_title(f'Steps: model={model} min_size={min_size_val}')
	plt.savefig(f'../data/plots/changepoint/all_model={model}_min_size={min_size_val}.png')
	plt.gcf().axes[0].plot(activity_vals, 'r')
	plt.gcf().axes[1].plot(activity_vals, 'r')
	plt.gcf().axes[2].plot(activity_vals, 'r')
	plt.savefig(f'../data/plots/changepoint/Activity-Overlap Data-Date-{date_str} All-Attr-Model={model} Min-Size={min_size_val}.png')