def get_anomalies(path, n_states, ratio): series = read_timeseries(path) factor = 10 new_series = bucketize(series, len(series) / factor) v = run_hmm(new_series, n_states) v = map(lambda x: int(x), v) hist = get_histogram(v) # return sorted histogram n_flagged = 0 flagged_values = list() for index, value in enumerate(hist): if n_flagged < len(v) * ratio and index != len(hist) - 1: #print n_flagged, len(v), ratio, len(v) * ratio, " done " n_flagged += value flagged_values.append(index + 1) # get intervals curr_flagged = 0 intervals = list() for index, value in enumerate(v): if value in flagged_values and curr_flagged == 0: start_int = index * factor curr_flagged = 1 if value not in flagged_values and curr_flagged == 1: intervals.append((start_int, index * factor)) curr_flagged = 0 if curr_flagged == 1: intervals.append((start_int, min(index * factor, len(v) - 1))) # convert intervals - indices to time values intervals = map(lambda (x, y): (series[x][0], series[y][0]), intervals) return intervals
def get_anomalies(path, n_states, ratio): series= read_timeseries(path) factor= 10 new_series= bucketize(series, len(series)/factor) v= run_hmm(new_series, n_states) v= map(lambda x: int(x), v) hist= get_histogram(v) # return sorted histogram n_flagged= 0 flagged_values= list() for index, value in enumerate(hist): if n_flagged < len(v) * ratio and index != len(hist)-1: #print n_flagged, len(v), ratio, len(v) * ratio, " done " n_flagged+= value flagged_values.append(index+1) # get intervals curr_flagged= 0 intervals= list() for index, value in enumerate(v): if value in flagged_values and curr_flagged== 0: start_int= index*factor curr_flagged= 1 if value not in flagged_values and curr_flagged== 1: intervals.append((start_int, index*factor)) curr_flagged= 0 if curr_flagged==1: intervals.append((start_int, min(index*factor, len(v)-1))) # convert intervals - indices to time values intervals= map(lambda (x, y):(series[x][0], series[y][0]), intervals) return intervals
def get_anomalies(path, n_states, ratio, feature_func= None, window_size= 15): # DEPRECATED times, values= read_lists(path) #featurelist= create_window_features(values, window_size, feature_func) # add slope functionality factor= 15 new_series= bucketize(times, values, len(values)/factor) likelihoods= get_likelihoods(new_series, n_states) #likelihoods= get_likelihoods(featurelist, n_states) return likelihoods_to_anomalies(times, likelihoods, ratio, factor)
def get_anomalies(path, n_states, ratio, feature_func=None, window_size=15): # DEPRECATED times, values = read_lists(path) #featurelist= create_window_features(values, window_size, feature_func) # add slope functionality factor = 15 new_series = bucketize(times, values, len(values) / factor) likelihoods = get_likelihoods(new_series, n_states) #likelihoods= get_likelihoods(featurelist, n_states) return likelihoods_to_anomalies(times, likelihoods, ratio, factor)
def get_anomalies_from_series(times, values, n_states, ratio): # DEPRECATED factor= 50 new_values= bucketize(times, values, len(values)/factor) likelihoods= get_likelihoods(new_values, n_states) return likelihoods_to_anomalies(times, likelihoods, ratio, factor)
def get_anomalies_from_series(times, values, n_states, ratio): # DEPRECATED factor = 50 new_values = bucketize(times, values, len(values) / factor) likelihoods = get_likelihoods(new_values, n_states) return likelihoods_to_anomalies(times, likelihoods, ratio, factor)
def get_anomalies(path, algorithm, feature=None, window_size=15, mul_dev=3, n_states=10, percent=2, base=512, levels=1): # mul_dev to be used for naive, percent for hmm. TODO: Use common metric for both. times, values = read_lists(path) if feature == "mean": flist = features.create_window_features(values, features.f_mean, window_size) times = times[window_size:len(times) - window_size] elif feature == "var": flist = features.create_window_features(values, features.f_var, window_size) times = times[window_size:len(times) - window_size] elif feature == "slope": flist = features.f_slope( times, values ) #TODO: bucketize/ smoothen? update: smoothening doesn't work times = times[:-1] elif feature == "deviance": flist = features.create_window_features(values, features.f_deviance, window_size) times = times[window_size:len(times) - window_size] elif feature == None: flist = values else: raise Exception("Unknown feature attribute in gateway.py") if algorithm == "hmm": bucket_size = 15 flist = bucketize(times, flist, bucket_size) likelihoods = hmm.get_likelihoods(flist, n_states) likelihoods = de_bucketize(times, likelihoods, bucket_size) #print likelihoods #return hmm.likelihoods_to_anomalies(times, likelihoods, float(percent)/100) return anomalies.min_anomalies(times, likelihoods, float(percent) / 100) elif algorithm == "naive": return naive.get_anomalies_from_series(times, flist, mul_dev) elif algorithm == "combined_hmm": bucket_size = 60 if len(values) < 4000: # hardcoded hack! bucket_size = 0 times = times[window_size:len(times) - window_size] # mean flist = bucketize( times, features.create_window_features(values, features.f_mean, window_size), bucket_size) mean_likelihoods = hmm.get_likelihoods(flist, n_states) # var flist = bucketize( times, features.create_window_features(values, features.f_var, window_size), bucket_size) var_likelihoods = hmm.get_likelihoods(flist, n_states) # deviance flist = bucketize( times, features.create_window_features(values, features.f_deviance, window_size), bucket_size) dev_likelihoods = hmm.get_likelihoods(flist, n_states) # slope #flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size) flist = features.f_slope(times, values) flist = bucketize(times, flist[window_size:len(flist) - window_size], bucket_size) slope_likelihoods = hmm.get_likelihoods(flist, n_states) # actual values values = bucketize(times, values[window_size:len(values) - window_size], bucket_size) value_likelihoods = hmm.get_likelihoods(values, n_states) mean_std = std(array(mean_likelihoods)) var_std = std(array(var_likelihoods)) dev_std = std(array(dev_likelihoods)) slope_std = std(array(slope_likelihoods)) value_std = std(array(value_likelihoods)) likelihoods = [ mean_likelihoods[i] / mean_std + var_likelihoods[i] / var_std + dev_likelihoods[i] / dev_std + slope_likelihoods[i] / slope_std + value_likelihoods[i] / value_std for i in range(0, len(values)) ] likelihoods = de_bucketize(times, likelihoods, bucket_size) ordered_anomalies, overlaps = anomalies.ordered_min_anomalies( times, likelihoods, ratio=0.005) return sorted(anomalies.min_cutoff(ordered_anomalies, overlaps)) #return anomalies.min_anomalies(times, likelihoods, ratio= 0.005) elif algorithm == "mv": return match.machine_majority_vote(path, float(percent) / 100) elif algorithm == "tmv": return match.ts_majority_vote(path, float(percent) / 100) elif algorithm == "optimal": anomaly = match.optimize_timeseries(path, mul_dev=3, percent=1.5, top=None)[0] return anomaly elif algorithm == "cascade": return cascade.compute_anomalies1(times, values, base=base, levels=levels) elif algorithm == "var_based": s = avg_std(values) print s if s > 0.0010: print "combined_hmm" return get_anomalies(path, "combined_hmm", feature, window_size, mul_dev, n_states, percent, base, levels) else: print "optimal" return get_anomalies(path, "optimal", feature, window_size, mul_dev, n_states, percent, base, levels) else: raise Exception("Unknown algorithm attribute in gateway.py")
def get_anomalies(path, algorithm, feature=None, window_size=15, mul_dev=3, n_states= 10, percent=2, base=512, levels=1): # mul_dev to be used for naive, percent for hmm. TODO: Use common metric for both. times, values= read_lists(path) if feature== "mean": flist= features.create_window_features(values, features.f_mean, window_size) times= times[window_size:len(times)-window_size] elif feature== "var": flist= features.create_window_features(values, features.f_var, window_size) times= times[window_size:len(times)-window_size] elif feature== "slope": flist= features.f_slope(times, values) #TODO: bucketize/ smoothen? update: smoothening doesn't work times= times[:-1] elif feature== "deviance": flist= features.create_window_features(values, features.f_deviance, window_size) times= times[window_size:len(times)-window_size] elif feature== None: flist= values else: raise Exception("Unknown feature attribute in gateway.py") if algorithm== "hmm": bucket_size= 15 flist= bucketize(times, flist, bucket_size) likelihoods= hmm.get_likelihoods(flist, n_states) likelihoods= de_bucketize(times, likelihoods, bucket_size) #print likelihoods #return hmm.likelihoods_to_anomalies(times, likelihoods, float(percent)/100) return anomalies.min_anomalies(times, likelihoods, float(percent)/100) elif algorithm== "naive": return naive.get_anomalies_from_series(times, flist, mul_dev) elif algorithm=="combined_hmm": bucket_size= 60 if len(values) < 4000: # hardcoded hack! bucket_size= 0 times= times[window_size:len(times)-window_size] # mean flist= bucketize(times, features.create_window_features(values, features.f_mean, window_size), bucket_size) mean_likelihoods= hmm.get_likelihoods(flist, n_states) # var flist= bucketize(times, features.create_window_features(values, features.f_var, window_size), bucket_size) var_likelihoods= hmm.get_likelihoods(flist, n_states) # deviance flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size) dev_likelihoods= hmm.get_likelihoods(flist, n_states) # slope #flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size) flist= features.f_slope(times, values) flist= bucketize(times, flist[window_size:len(flist)-window_size], bucket_size) slope_likelihoods= hmm.get_likelihoods(flist, n_states) # actual values values= bucketize(times, values[window_size:len(values)- window_size], bucket_size) value_likelihoods= hmm.get_likelihoods(values, n_states) mean_std= std(array(mean_likelihoods)) var_std= std(array(var_likelihoods)) dev_std= std(array(dev_likelihoods)) slope_std= std(array(slope_likelihoods)) value_std= std(array(value_likelihoods)) likelihoods= [mean_likelihoods[i]/mean_std + var_likelihoods[i]/var_std + dev_likelihoods[i]/dev_std + slope_likelihoods[i]/slope_std + value_likelihoods[i]/value_std for i in range(0, len(values))] likelihoods= de_bucketize(times, likelihoods, bucket_size) ordered_anomalies, overlaps = anomalies.ordered_min_anomalies(times, likelihoods, ratio= 0.005) return sorted(anomalies.min_cutoff(ordered_anomalies, overlaps)) #return anomalies.min_anomalies(times, likelihoods, ratio= 0.005) elif algorithm=="mv": return match.machine_majority_vote(path, float(percent)/100) elif algorithm=="tmv": return match.ts_majority_vote(path, float(percent)/100) elif algorithm== "optimal": anomaly= match.optimize_timeseries(path, mul_dev= 3, percent= 1.5, top= None)[0] return anomaly elif algorithm == "cascade": return cascade.compute_anomalies1(times, values, base=base, levels=levels) elif algorithm== "var_based": s= avg_std(values) print s if s > 0.0010: print "combined_hmm" return get_anomalies(path, "combined_hmm", feature, window_size, mul_dev, n_states, percent, base, levels) else: print "optimal" return get_anomalies(path, "optimal", feature, window_size, mul_dev, n_states, percent, base, levels) else: raise Exception("Unknown algorithm attribute in gateway.py")