Ejemplo n.º 1
0
def detect_SMA(path, window, threshold):
    series = read_timeseries(path)
    values = np.array(map(lambda x: x[1], series))
    s_ma = moving_avg(values, window)
    anomalies = []
    times = list()
    values = list()
    for i in range(window + 1, len(series) - window):
        dist = abs(float(s_ma[i] - series[i][1]))
        #if dist >= values.ptp()*threshold:
        anomalies.append((i, series[i][0], dist))
        times.append(series[i][0])
        values.append(dist)
    filtered_anomalies = naive.get_anomalies_from_series(times, values, 3)
    #filtered_anomalies = naive.get_anomalies_from_series(map(lambda x:x[1:],anomalies),3)
    return filtered_anomalies
Ejemplo n.º 2
0
def detect_SMA(path, window, threshold):
    series = read_timeseries(path)
    values = np.array(map(lambda x:x[1],series))
    s_ma = moving_avg(values,window)
    anomalies = []
    times = list()
    values= list()
    for i in range(window+1,len(series)-window):
        dist = abs(float(s_ma[i]-series[i][1]))
        #if dist >= values.ptp()*threshold:
        anomalies.append((i, series[i][0], dist))
        times.append(series[i][0])
        values.append(dist)
    filtered_anomalies = naive.get_anomalies_from_series(times, values, 3)
    #filtered_anomalies = naive.get_anomalies_from_series(map(lambda x:x[1:],anomalies),3)
    return filtered_anomalies
Ejemplo n.º 3
0
def get_anomalies(path,
                  algorithm,
                  feature=None,
                  window_size=15,
                  mul_dev=3,
                  n_states=10,
                  percent=2,
                  base=512,
                  levels=1):
    # mul_dev to be used for naive, percent for hmm. TODO: Use common metric for both.
    times, values = read_lists(path)

    if feature == "mean":
        flist = features.create_window_features(values, features.f_mean,
                                                window_size)
        times = times[window_size:len(times) - window_size]
    elif feature == "var":
        flist = features.create_window_features(values, features.f_var,
                                                window_size)
        times = times[window_size:len(times) - window_size]
    elif feature == "slope":
        flist = features.f_slope(
            times, values
        )  #TODO: bucketize/ smoothen? update: smoothening doesn't work
        times = times[:-1]
    elif feature == "deviance":
        flist = features.create_window_features(values, features.f_deviance,
                                                window_size)
        times = times[window_size:len(times) - window_size]
    elif feature == None:
        flist = values
    else:
        raise Exception("Unknown feature attribute in gateway.py")

    if algorithm == "hmm":
        bucket_size = 15
        flist = bucketize(times, flist, bucket_size)
        likelihoods = hmm.get_likelihoods(flist, n_states)
        likelihoods = de_bucketize(times, likelihoods, bucket_size)
        #print likelihoods
        #return hmm.likelihoods_to_anomalies(times, likelihoods, float(percent)/100)
        return anomalies.min_anomalies(times, likelihoods,
                                       float(percent) / 100)

    elif algorithm == "naive":
        return naive.get_anomalies_from_series(times, flist, mul_dev)

    elif algorithm == "combined_hmm":
        bucket_size = 60
        if len(values) < 4000:  # hardcoded hack!
            bucket_size = 0
        times = times[window_size:len(times) - window_size]
        # mean
        flist = bucketize(
            times,
            features.create_window_features(values, features.f_mean,
                                            window_size), bucket_size)
        mean_likelihoods = hmm.get_likelihoods(flist, n_states)
        # var
        flist = bucketize(
            times,
            features.create_window_features(values, features.f_var,
                                            window_size), bucket_size)
        var_likelihoods = hmm.get_likelihoods(flist, n_states)
        # deviance
        flist = bucketize(
            times,
            features.create_window_features(values, features.f_deviance,
                                            window_size), bucket_size)
        dev_likelihoods = hmm.get_likelihoods(flist, n_states)
        # slope
        #flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size)
        flist = features.f_slope(times, values)
        flist = bucketize(times, flist[window_size:len(flist) - window_size],
                          bucket_size)
        slope_likelihoods = hmm.get_likelihoods(flist, n_states)
        # actual values
        values = bucketize(times,
                           values[window_size:len(values) - window_size],
                           bucket_size)
        value_likelihoods = hmm.get_likelihoods(values, n_states)
        mean_std = std(array(mean_likelihoods))
        var_std = std(array(var_likelihoods))
        dev_std = std(array(dev_likelihoods))
        slope_std = std(array(slope_likelihoods))
        value_std = std(array(value_likelihoods))
        likelihoods = [
            mean_likelihoods[i] / mean_std + var_likelihoods[i] / var_std +
            dev_likelihoods[i] / dev_std + slope_likelihoods[i] / slope_std +
            value_likelihoods[i] / value_std for i in range(0, len(values))
        ]
        likelihoods = de_bucketize(times, likelihoods, bucket_size)
        ordered_anomalies, overlaps = anomalies.ordered_min_anomalies(
            times, likelihoods, ratio=0.005)
        return sorted(anomalies.min_cutoff(ordered_anomalies, overlaps))
        #return anomalies.min_anomalies(times, likelihoods, ratio= 0.005)
    elif algorithm == "mv":
        return match.machine_majority_vote(path, float(percent) / 100)
    elif algorithm == "tmv":
        return match.ts_majority_vote(path, float(percent) / 100)
    elif algorithm == "optimal":
        anomaly = match.optimize_timeseries(path,
                                            mul_dev=3,
                                            percent=1.5,
                                            top=None)[0]
        return anomaly
    elif algorithm == "cascade":
        return cascade.compute_anomalies1(times,
                                          values,
                                          base=base,
                                          levels=levels)
    elif algorithm == "var_based":
        s = avg_std(values)
        print s
        if s > 0.0010:
            print "combined_hmm"
            return get_anomalies(path, "combined_hmm", feature, window_size,
                                 mul_dev, n_states, percent, base, levels)
        else:
            print "optimal"
            return get_anomalies(path, "optimal", feature, window_size,
                                 mul_dev, n_states, percent, base, levels)
    else:
        raise Exception("Unknown algorithm attribute in gateway.py")
Ejemplo n.º 4
0
def get_anomalies(path, algorithm, feature=None, window_size=15, mul_dev=3, n_states= 10, percent=2, base=512, levels=1):
    # mul_dev to be used for naive, percent for hmm. TODO: Use common metric for both.
    times, values= read_lists(path)

    if feature== "mean":
        flist= features.create_window_features(values, features.f_mean, window_size)
        times= times[window_size:len(times)-window_size]
    elif feature== "var":
        flist= features.create_window_features(values, features.f_var, window_size)
        times= times[window_size:len(times)-window_size]
    elif feature== "slope":
        flist= features.f_slope(times, values)      #TODO: bucketize/ smoothen? update: smoothening doesn't work
        times= times[:-1]
    elif feature== "deviance":
        flist= features.create_window_features(values, features.f_deviance, window_size)
        times= times[window_size:len(times)-window_size]
    elif feature== None:
        flist= values
    else:
        raise Exception("Unknown feature attribute in gateway.py")

    if algorithm== "hmm":
        bucket_size= 15
        flist= bucketize(times, flist, bucket_size)
        likelihoods= hmm.get_likelihoods(flist, n_states) 
        likelihoods= de_bucketize(times, likelihoods, bucket_size)
        #print likelihoods
        #return hmm.likelihoods_to_anomalies(times, likelihoods, float(percent)/100)    
        return anomalies.min_anomalies(times, likelihoods, float(percent)/100)

    elif algorithm== "naive":
        return naive.get_anomalies_from_series(times, flist, mul_dev)

    elif algorithm=="combined_hmm":
        bucket_size= 60
        if len(values) < 4000:  # hardcoded hack!
            bucket_size= 0
        times= times[window_size:len(times)-window_size]
        # mean
        flist= bucketize(times, features.create_window_features(values, features.f_mean, window_size), bucket_size)
        mean_likelihoods= hmm.get_likelihoods(flist, n_states)
        # var
        flist= bucketize(times, features.create_window_features(values, features.f_var, window_size), bucket_size)
        var_likelihoods= hmm.get_likelihoods(flist, n_states)
        # deviance
        flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size)
        dev_likelihoods= hmm.get_likelihoods(flist, n_states)
        # slope
        #flist= bucketize(times, features.create_window_features(values, features.f_deviance, window_size), bucket_size)
        flist= features.f_slope(times, values)
        flist= bucketize(times, flist[window_size:len(flist)-window_size], bucket_size)
        slope_likelihoods= hmm.get_likelihoods(flist, n_states)
        # actual values
        values= bucketize(times, values[window_size:len(values)- window_size], bucket_size)
        value_likelihoods= hmm.get_likelihoods(values, n_states)
        mean_std= std(array(mean_likelihoods))
        var_std= std(array(var_likelihoods))
        dev_std= std(array(dev_likelihoods))
        slope_std= std(array(slope_likelihoods))
        value_std= std(array(value_likelihoods))
        likelihoods= [mean_likelihoods[i]/mean_std + var_likelihoods[i]/var_std + dev_likelihoods[i]/dev_std + slope_likelihoods[i]/slope_std +  value_likelihoods[i]/value_std for i in range(0, len(values))]
        likelihoods= de_bucketize(times, likelihoods, bucket_size)
        ordered_anomalies, overlaps =  anomalies.ordered_min_anomalies(times, likelihoods, ratio= 0.005)
        return sorted(anomalies.min_cutoff(ordered_anomalies, overlaps))
        #return anomalies.min_anomalies(times, likelihoods, ratio= 0.005)
    elif algorithm=="mv":
        return match.machine_majority_vote(path, float(percent)/100) 
    elif algorithm=="tmv":
        return match.ts_majority_vote(path, float(percent)/100)
    elif algorithm== "optimal":
        anomaly=  match.optimize_timeseries(path, mul_dev= 3, percent= 1.5, top= None)[0]
        return anomaly
    elif algorithm == "cascade":
        return cascade.compute_anomalies1(times, values, base=base, levels=levels)
    elif algorithm== "var_based":
        s= avg_std(values)
        print s
        if s > 0.0010:
            print "combined_hmm"
            return get_anomalies(path, "combined_hmm", feature, window_size, mul_dev, n_states, percent, base, levels)
        else:
            print "optimal"
            return get_anomalies(path, "optimal", feature, window_size, mul_dev, n_states, percent, base, levels)
    else:
        raise Exception("Unknown algorithm attribute in gateway.py")