예제 #1
0
        #data = load_data('chlorine')

        #inserted anomaly
        #data[1500:2000, 0:4] = 0.7
        #data = data[:,:10]

        #data = D['data']

        ## Missout low valued TS
        #mask = data.mean(axis=0) > 50
        #data = data[:, mask]

        #data = load_ts_data('isp_routers', 'full')

        # Z score data
        data = zscore(data)
        #data = zscore_win(data, 250)
        # Fix Nans
        whereAreNaNs = np.isnan(data)
        data[whereAreNaNs] = 0

    # old rank adaptation - thresholds
    e_high = 0.99
    e_low = 0.90

    alpha = 0.98
    # New rank adaptation - EWMA
    F_min = 0.92
    epsilon = 0.02

    EW_mean_alpha = 0.1  # for incremental mean
예제 #2
0
def SAX(data, alphabet_size, word_size, minstd=1.0, pre_normed=False):
    """ Returns one word for each data stream 
  
  word_size == Number of segments data is split into for PAA
  alphabet_size == Number of symbols used
  """

    num_streams = data.shape[1]

    # Need to insert check here for stationary segemnts
    mask = data.std(axis=0) < minstd
    passed = np.invert(mask)
    if np.any(mask):
        # Scale data to have a mean of 0 and a standard deviation of 1.
        if pre_normed == False:
            data[:, passed] = zscore(data[:, passed])
        symbol4skips = string.ascii_letters[int(np.ceil(alphabet_size / 2.))]
    else:
        # Scale data to have a mean of 0 and a standard deviation of 1.
        if pre_normed == False:
            data = zscore(data)

    # Calculate our breakpoint locations.
    breakpoints = bp_lookup(alphabet_size)
    breakpoints = np.concatenate((breakpoints, np.array([np.Inf])))

    # Split the data into a list of word_size pieces.
    data = np.array_split(data, word_size, axis=0)

    # Predifine Matrices
    segment_means = np.zeros((word_size, num_streams))
    #segment_symbol = np.zeros((word_size,num_streams), dtype = np.str)
    p_array = np.zeros((num_streams, ), dtype=('a1,' * word_size + 'i2'))
    p_dict = {}

    # Calculate the mean for each section.
    for i in range(word_size):
        segment_means[i, passed] = data[i][:, passed].mean(axis=0)

    # Figure out which break each section is in based on the section_means and
    # calculated breakpoints.
    for i in range(num_streams):
        for j in range(word_size):
            if passed[i]:
                idx = int(np.where(breakpoints > segment_means[j, i])[0][0])
                # Store in phrase_array
                p_array[i][j] = string.ascii_letters[idx]
            else:
                p_array[i][j] = symbol4skips

        # Store in phrase_dict
        phrase = ''.join(tuple(p_array[i])[:word_size])
        if p_dict.has_key(phrase):
            p_dict[phrase].append(i)
        else:
            p_dict[phrase] = [i]

    # Put frequency of pattern in p_array
    for vals in p_dict.itervalues():
        count = len(vals)
        for i in range(count):
            p_array[vals[i]][-1] = count

    return p_array, p_dict, segment_means
예제 #3
0
        
        #inserted anomaly 
        data[1500:2000, 10:20] = 0.0
        #data = data[:,:20]
        
        #data = D['data']
        
        ## Missout low valued TS 
        #mask = data.mean(axis=0) > 50
        #data = data[:, mask]


        #data = load_ts_data('isp_routers', 'mid')
        
        # Z score data
        data = zscore(data)
        #data = zscore_win(data, 250)
        # Fix Nans 
        whereAreNaNs = np.isnan(data)
        data[whereAreNaNs] = 0
    
    # old rank adaptation - thresholds  
    e_high = 0.99
    e_low = 0.94
    
    alpha = 0.96
    # New rank adaptation - EWMA
    F_min = 0.9
    epsilon = 0.05
    
    EW_mean_alpha = 0.1 # for incremental mean 
예제 #4
0
def SAX(data, alphabet_size, word_size, minstd = 1.0, pre_normed = False):
  """ Returns one word for each data stream 
  
  word_size == Number of segments data is split into for PAA
  alphabet_size == Number of symbols used
  """
    
  num_streams = data.shape[1]    
    
  # Need to insert check here for stationary segemnts
  mask = data.std(axis=0) < minstd
  passed = np.invert(mask)
  if np.any(mask):    
    # Scale data to have a mean of 0 and a standard deviation of 1.
    if pre_normed == False:
      data[:,passed] = zscore(data[:, passed])
    symbol4skips = string.ascii_letters[int(np.ceil(alphabet_size/2.))]
  else:
    # Scale data to have a mean of 0 and a standard deviation of 1.
    if pre_normed == False:
      data = zscore(data)
  
  # Calculate our breakpoint locations.
  breakpoints = bp_lookup(alphabet_size)
  breakpoints = np.concatenate((breakpoints, np.array([np.Inf])))

  # Split the data into a list of word_size pieces.
  data = np.array_split(data, word_size, axis=0)
  
  # Predifine Matrices 
  segment_means = np.zeros((word_size,num_streams))
  #segment_symbol = np.zeros((word_size,num_streams), dtype = np.str)
  p_array = np.zeros((num_streams,),dtype = ('a1,' * word_size + 'i2'))
  p_dict = {}
  
  # Calculate the mean for each section.  
  for i in range(word_size):
    segment_means[i,passed] = data[i][:,passed].mean(axis = 0) 
    
  # Figure out which break each section is in based on the section_means and
  # calculated breakpoints. 
  for i in range(num_streams): 
    for j in range(word_size):
      if passed[i]:
        idx = int(np.where(breakpoints > segment_means[j,i])[0][0])
        # Store in phrase_array 
        p_array[i][j] = string.ascii_letters[idx]
      else:
        p_array[i][j] = symbol4skips
    
    # Store in phrase_dict
    phrase  = ''.join(tuple(p_array[i])[:word_size])
    if p_dict.has_key(phrase):
      p_dict[phrase].append(i)
    else:
      p_dict[phrase] = [i]

  # Put frequency of pattern in p_array
  for vals in p_dict.itervalues():
    count = len(vals)
    for i in range(count):
      p_array[vals[i]][-1] = count  
  
  return p_array, p_dict, segment_means