def is_outlier(): if not flask.request.json or not 'symptom' in flask.request.json or not 'value' in flask.request.json or not 'ts' in flask.request.json or not 'uid' in flask.request.json: flask.abort(400) symptom = flask.request.json["symptom"] val = flask.request.json["value"] ts = flask.request.json["ts"] uid = flask.request.json["uid"] #date_time_obj = datetime.datetime.fromisoformat(ts) hour_before = datetime.datetime.now() - datetime.timedelta(hours=1) print(hour_before) if not (symptom in symptoms): return flask.jsonify( {'message': f'Invalid Symptom! Accepted symptoms are {symptoms}'}), 400 select_query_1_hour = f"select timestamp,\"{symptom}\" from symptom where {symptom} is not null and \"userId\" = '{int(uid)}' and timestamp > '{hour_before}' order by timestamp desc" #select_query = f"select timestamp,\"{symptom}\" from symptom where {symptom} is not null and \"userId\" = '{int(uid)}' order by timestamp desc" column_names = ["timestamp", symptom] conn = connect() df = postgresql_to_pd(conn, select_query_1_hour, column_names) print(df) if len(df) == 0: return flask.jsonify({ 'isOutlier': str(False), 'noData': str(True) }), 200 df = df.append({f"{symptom}": val, "timestamp": ts}, ignore_index=True) df["DateTime"] = pd.to_datetime(df["timestamp"]) df = df.set_index(pd.DatetimeIndex(df["DateTime"])) df = df.drop(["timestamp", "DateTime"], axis=1) validate_series(df) outlier_detector = OutlierDetector( LocalOutlierFactor(contamination=0.05, n_neighbors=int(round(len(df) / 2)))) anomalies = outlier_detector.fit_detect(df) conn.close() return flask.jsonify({'outlier': str(anomalies.iat[-1])}), 200
def isolationForestAD(self, c): outlier_detector = OutlierDetector(IsolationForest(contamination=c)) anomalies = outlier_detector.fit_detect(self.data) self.anomalies = anomalies self.evaluate()
def localOutlierFactorAD(self, c): outlier_detector = OutlierDetector(LocalOutlierFactor(contamination=c)) anomalies = outlier_detector.fit_detect(self.data) self.anomalies = anomalies self.evaluate()
plot(df, anomaly_pred=anomalies, ts_linewidth=2, ts_markersize=3, ap_color='red', ap_alpha=0.3, curve_group='all') '''GET ACCURACY''' found_events = len(np.where(Y[anomalies.values] == 1)[0]) accuracy = found_events / len(eventtime_incon[0]) accuracy # !!!! PLOT ROC CURVE !!!! '''THIS IS SUPER COOL B=) ''' from adtk.detector import OutlierDetector from sklearn.neighbors import LocalOutlierFactor outlier_detector = OutlierDetector(LocalOutlierFactor(contamination=0.05)) anomalies = outlier_detector.fit_detect(df) plot(df, anomaly_pred=anomalies, ts_linewidth=2, ts_markersize=3, ap_color='red', ap_alpha=0.3, curve_group='all') from adtk.detector import RegressionAD from sklearn.linear_model import LinearRegression regression_ad = RegressionAD(regressor=LinearRegression(), target="data2", c=3.0) anomalies = regression_ad.fit_detect(df)
#TimeBins=TimeBins[TimeBins["timestamp"] < '2020-06-30 23:55:55'] TimeBins = TimeBins.set_index('timestamp') TimeBins = validate_series(TimeBins) #persist_ad = PersistAD(window=7, c=3, side='both') #anomalies1 = persist_ad.fit_detect(TimeBins) #plot(TimeBins, anomaly=anomalies1, ts_linewidth=1, ts_markersize=3, anomaly_color='red', figsize=(20,10), anomaly_tag="marker", anomaly_markersize=5) #customized_detector = CustomizedDetectorHD(detect_func=Detector_prive) #anomalies = customized_detector.detect(TimeBins) #threshold_ad = ThresholdAD(high=150, low=0) #anomalies = threshold_ad.detect(TimeBins) #plot(TimeBins, anomaly=anomalies, ts_linewidth=1, ts_markersize=5, anomaly_color='red', anomaly_alpha=0.3, curve_group='all'); outlier_detector = OutlierDetector( LocalOutlierFactor(n_neighbors=1, p=1, contamination=0.05)) anomalies = outlier_detector.fit_detect(TimeBins) plot(TimeBins, anomaly=anomalies, ts_linewidth=1, ts_markersize=5, anomaly_color='red', anomaly_alpha=0.3, curve_group='all') plt.ylim(top=460) plt.savefig('%d_%d.pdf' % Input2 % elem, bbox_inches='tight') plt.close() del TimeBins del rslt_df del boolean_condition
def adtk_init(model, colname=None): if model == 'iqr': from adtk.detector import InterQuartileRangeAD clf = InterQuartileRangeAD() elif model == 'ar': from adtk.detector import AutoregressionAD clf = AutoregressionAD() elif model == 'esd': from adtk.detector import GeneralizedESDTestAD clf = GeneralizedESDTestAD() elif model == 'level': from adtk.detector import LevelShiftAD clf = LevelShiftAD(15) elif model == 'persist': from adtk.detector import PersistAD clf = PersistAD(15) elif model == 'quantile': from adtk.detector import QuantileAD clf = QuantileAD() elif model == 'seasonal': from adtk.detector import SeasonalAD clf = SeasonalAD() elif model == 'volatility': from adtk.detector import VolatilityShiftAD clf = VolatilityShiftAD(15) elif model == 'kmeans': from adtk.detector import MinClusterDetector from sklearn.cluster import KMeans clf = MinClusterDetector(KMeans(n_clusters=2)) elif model == 'birch': from adtk.detector import MinClusterDetector from sklearn.cluster import Birch clf = MinClusterDetector(Birch(threshold=0.25, branching_factor=25)) elif model == 'gmm': from adtk.detector import MinClusterDetector from sklearn.mixture import GaussianMixture clf = MinClusterDetector(GaussianMixture(n_components=2, max_iter=50)) elif model == 'vbgmm': from adtk.detector import MinClusterDetector from sklearn.mixture import BayesianGaussianMixture clf = MinClusterDetector(BayesianGaussianMixture(n_components=2, max_iter=50)) elif model == 'eliptic': from adtk.detector import OutlierDetector from sklearn.covariance import EllipticEnvelope clf = OutlierDetector(EllipticEnvelope()) elif model == 'mcdad': from adtk.detector import OutlierDetector from sklearn.covariance import MinCovDet clf = OutlierDetector(MinCovDet()) elif model == 'isof': from adtk.detector import OutlierDetector from sklearn.ensemble import IsolationForest clf = OutlierDetector(IsolationForest()) elif model == 'lofad': from adtk.detector import OutlierDetector from sklearn.neighbors import LocalOutlierFactor clf = OutlierDetector(LocalOutlierFactor()) elif model == 'pcaad': from adtk.detector import PcaAD clf = PcaAD() elif model == 'linear': from adtk.detector import RegressionAD from sklearn.linear_model import LinearRegression clf = RegressionAD(LinearRegression(), target=colname) elif model == 'rf': from adtk.detector import RegressionAD from sklearn.ensemble import RandomForestRegressor clf = RegressionAD(RandomForestRegressor(), target=colname) elif model == 'huber': from adtk.detector import RegressionAD from sklearn.linear_model import HuberRegressor clf = RegressionAD(HuberRegressor(), target=colname) elif model == 'knnad': from adtk.detector import RegressionAD from sklearn.neighbors import KNeighborsRegressor clf = RegressionAD(KNeighborsRegressor(), target=colname) elif model == 'kernridge': from adtk.detector import RegressionAD from sklearn.kernel_ridge import KernelRidge clf = RegressionAD(KernelRidge(), target=colname) else: clf = ADTKDefault() return clf