Esempio n. 1
0
def is_outlier():
    if not flask.request.json or not 'symptom' in flask.request.json or not 'value' in flask.request.json or not 'ts' in flask.request.json or not 'uid' in flask.request.json:
        flask.abort(400)
    symptom = flask.request.json["symptom"]
    val = flask.request.json["value"]
    ts = flask.request.json["ts"]
    uid = flask.request.json["uid"]
    #date_time_obj = datetime.datetime.fromisoformat(ts)
    hour_before = datetime.datetime.now() - datetime.timedelta(hours=1)
    print(hour_before)
    if not (symptom in symptoms):
        return flask.jsonify(
            {'message':
             f'Invalid Symptom! Accepted symptoms are {symptoms}'}), 400

    select_query_1_hour = f"select timestamp,\"{symptom}\" from symptom where {symptom} is not null and \"userId\" = '{int(uid)}' and timestamp > '{hour_before}' order by timestamp desc"
    #select_query = f"select timestamp,\"{symptom}\" from symptom where {symptom} is not null and \"userId\" = '{int(uid)}' order by timestamp desc"
    column_names = ["timestamp", symptom]
    conn = connect()
    df = postgresql_to_pd(conn, select_query_1_hour, column_names)
    print(df)
    if len(df) == 0:
        return flask.jsonify({
            'isOutlier': str(False),
            'noData': str(True)
        }), 200
    df = df.append({f"{symptom}": val, "timestamp": ts}, ignore_index=True)
    df["DateTime"] = pd.to_datetime(df["timestamp"])
    df = df.set_index(pd.DatetimeIndex(df["DateTime"]))
    df = df.drop(["timestamp", "DateTime"], axis=1)
    validate_series(df)
    outlier_detector = OutlierDetector(
        LocalOutlierFactor(contamination=0.05,
                           n_neighbors=int(round(len(df) / 2))))
    anomalies = outlier_detector.fit_detect(df)
    conn.close()
    return flask.jsonify({'outlier': str(anomalies.iat[-1])}), 200
 def isolationForestAD(self, c):
     outlier_detector = OutlierDetector(IsolationForest(contamination=c))
     anomalies = outlier_detector.fit_detect(self.data)
     self.anomalies = anomalies
     self.evaluate()
 def localOutlierFactorAD(self, c):
     outlier_detector = OutlierDetector(LocalOutlierFactor(contamination=c))
     anomalies = outlier_detector.fit_detect(self.data)
     self.anomalies = anomalies
     self.evaluate()
Esempio n. 4
0
plot(df,
     anomaly_pred=anomalies,
     ts_linewidth=2,
     ts_markersize=3,
     ap_color='red',
     ap_alpha=0.3,
     curve_group='all')
'''GET ACCURACY'''
found_events = len(np.where(Y[anomalies.values] == 1)[0])
accuracy = found_events / len(eventtime_incon[0])
accuracy
# !!!! PLOT ROC CURVE !!!!
'''THIS IS SUPER COOL B=) '''
from adtk.detector import OutlierDetector
from sklearn.neighbors import LocalOutlierFactor
outlier_detector = OutlierDetector(LocalOutlierFactor(contamination=0.05))
anomalies = outlier_detector.fit_detect(df)
plot(df,
     anomaly_pred=anomalies,
     ts_linewidth=2,
     ts_markersize=3,
     ap_color='red',
     ap_alpha=0.3,
     curve_group='all')

from adtk.detector import RegressionAD
from sklearn.linear_model import LinearRegression
regression_ad = RegressionAD(regressor=LinearRegression(),
                             target="data2",
                             c=3.0)
anomalies = regression_ad.fit_detect(df)
Esempio n. 5
0
    #TimeBins=TimeBins[TimeBins["timestamp"] < '2020-06-30 23:55:55']
    TimeBins = TimeBins.set_index('timestamp')
    TimeBins = validate_series(TimeBins)

    #persist_ad = PersistAD(window=7, c=3, side='both')
    #anomalies1 = persist_ad.fit_detect(TimeBins)
    #plot(TimeBins, anomaly=anomalies1, ts_linewidth=1, ts_markersize=3, anomaly_color='red', figsize=(20,10), anomaly_tag="marker", anomaly_markersize=5)

    #customized_detector = CustomizedDetectorHD(detect_func=Detector_prive)
    #anomalies = customized_detector.detect(TimeBins)

    #threshold_ad = ThresholdAD(high=150, low=0)
    #anomalies = threshold_ad.detect(TimeBins)
    #plot(TimeBins, anomaly=anomalies, ts_linewidth=1, ts_markersize=5, anomaly_color='red', anomaly_alpha=0.3, curve_group='all');

    outlier_detector = OutlierDetector(
        LocalOutlierFactor(n_neighbors=1, p=1, contamination=0.05))
    anomalies = outlier_detector.fit_detect(TimeBins)

    plot(TimeBins,
         anomaly=anomalies,
         ts_linewidth=1,
         ts_markersize=5,
         anomaly_color='red',
         anomaly_alpha=0.3,
         curve_group='all')
    plt.ylim(top=460)
    plt.savefig('%d_%d.pdf' % Input2 % elem, bbox_inches='tight')
    plt.close()
    del TimeBins
    del rslt_df
    del boolean_condition
Esempio n. 6
0
def adtk_init(model, colname=None):
    if model == 'iqr':
        from adtk.detector import InterQuartileRangeAD
        clf = InterQuartileRangeAD()
    elif model == 'ar':
        from adtk.detector import AutoregressionAD
        clf = AutoregressionAD()
    elif model == 'esd':
        from adtk.detector import GeneralizedESDTestAD
        clf = GeneralizedESDTestAD()
    elif model == 'level':
        from adtk.detector import LevelShiftAD
        clf = LevelShiftAD(15)
    elif model == 'persist':
        from adtk.detector import PersistAD
        clf = PersistAD(15)
    elif model == 'quantile':
        from adtk.detector import QuantileAD
        clf = QuantileAD()
    elif model == 'seasonal':
        from adtk.detector import SeasonalAD
        clf = SeasonalAD()
    elif model == 'volatility':
        from adtk.detector import VolatilityShiftAD
        clf = VolatilityShiftAD(15)
    elif model == 'kmeans':
        from adtk.detector import MinClusterDetector
        from sklearn.cluster import KMeans
        clf = MinClusterDetector(KMeans(n_clusters=2))
    elif model == 'birch':
        from adtk.detector import MinClusterDetector
        from sklearn.cluster import Birch
        clf = MinClusterDetector(Birch(threshold=0.25, branching_factor=25))
    elif model == 'gmm':
        from adtk.detector import MinClusterDetector
        from sklearn.mixture import GaussianMixture
        clf = MinClusterDetector(GaussianMixture(n_components=2, max_iter=50))
    elif model == 'vbgmm':
        from adtk.detector import MinClusterDetector
        from sklearn.mixture import BayesianGaussianMixture
        clf = MinClusterDetector(BayesianGaussianMixture(n_components=2, max_iter=50))
    elif model == 'eliptic':
        from adtk.detector import OutlierDetector
        from sklearn.covariance import EllipticEnvelope
        clf = OutlierDetector(EllipticEnvelope())
    elif model == 'mcdad':
        from adtk.detector import OutlierDetector
        from sklearn.covariance import MinCovDet
        clf = OutlierDetector(MinCovDet())
    elif model == 'isof':
        from adtk.detector import OutlierDetector
        from sklearn.ensemble import IsolationForest
        clf = OutlierDetector(IsolationForest())
    elif model == 'lofad':
        from adtk.detector import OutlierDetector
        from sklearn.neighbors import LocalOutlierFactor
        clf = OutlierDetector(LocalOutlierFactor())
    elif model == 'pcaad':
        from adtk.detector import PcaAD
        clf = PcaAD()
    elif model == 'linear':
        from adtk.detector import RegressionAD
        from sklearn.linear_model import LinearRegression
        clf = RegressionAD(LinearRegression(), target=colname)
    elif model == 'rf':
        from adtk.detector import RegressionAD
        from sklearn.ensemble import RandomForestRegressor
        clf = RegressionAD(RandomForestRegressor(), target=colname)
    elif model == 'huber':
        from adtk.detector import RegressionAD
        from sklearn.linear_model import HuberRegressor
        clf = RegressionAD(HuberRegressor(), target=colname)
    elif model == 'knnad':
        from adtk.detector import RegressionAD
        from sklearn.neighbors import KNeighborsRegressor
        clf = RegressionAD(KNeighborsRegressor(), target=colname)
    elif model == 'kernridge':
        from adtk.detector import RegressionAD
        from sklearn.kernel_ridge import KernelRidge
        clf = RegressionAD(KernelRidge(), target=colname)
    else:
        clf = ADTKDefault()
    return clf