def model2(data): result = detect_vec(data, max_anoms=0.2, direction='both', period=14, alpha=0.5) return np.array(result['anoms'].index)
def run(period = None, longterm_period = None, link_data = None): if period is None: period = 24 longterm_period = period * 30 + 1 twitter_example_data = pd.read_csv(link_data) data = twitter_example_data[twitter_example_data.columns[1]] results = detect_vec(data, max_anoms=0.02, \ direction='both', period=period, longterm_period=longterm_period) res_table = results['anoms'] index_ano = res_table[res_table.columns[1]].tolist() res = [] for i in range(len(data)): if i in index_ano: res.append(1) else: res.append(0) res = pd.DataFrame(res) res.columns = ['ano'] result_table = pd.concat([twitter_example_data.reset_index(drop=True), res], axis=1) return result_table
def test_both_directions_e_value_threshold_med_max(self): results = detect_vec(self.raw_data.iloc[:, 1], max_anoms=0.02, direction='both', period=1440, threshold="med_max", e_value=True) eq_(len(results['anoms'].columns), 3) eq_(len(results['anoms'].iloc[:, 1]), 6)
def test_both_directions_e_value_longterm(self): results = detect_vec(self.raw_data.iloc[:, 1], max_anoms=0.02, direction='both', period=1440, longterm_period=1440 * 14, e_value=True) eq_(len(results['anoms'].columns), 3) eq_(len(results['anoms'].iloc[:, 1]), 131)
def test_both_directions_with_plot(self): results = detect_vec(self.raw_data.iloc[:, 1], max_anoms=0.02, direction='both', period=1440, only_last=True, plot=False) eq_(len(results['anoms'].columns), 2) eq_(len(results['anoms'].iloc[:, 1]), 25)
def test_both_directions_e_value_longterm(self): results = detect_vec( self.raw_data.iloc[:, 1], max_anoms=0.02, direction="both", period=1440, longterm_period=1440 * 14, e_value=True, ) eq_(len(results["anoms"].columns), 3) eq_(len(results["anoms"].iloc[:, 1]), 131)
def s_h_esd_algo(total_daily): #total_daily[garage_index][day_index][hour_index] #get the dates dates = bdate_range(smarking_globals.start_date, periods=len(total_daily)) #print dates data = [] hours = [] index = 0 #hard coding for one garage, sorry #ww is a day for ww in total_daily: #create the hours for that day temp_hours = bdate_range(dates[index], periods=24, freq='H') m = 0 #ww is a day #for each hour for hr in ww: hours.append(temp_hours[m]) data.append(hr) m = m + 1 #data.append(temp) index = index + 1 df1 = Series((v for v in data)) try: results = detect_vec(df1, period=120, max_anoms=0.02, direction='both') except RRuntimeError: #there is something wrong with the data, may be not periodic print("could not run anomaly detection due to bad data") sys.exit(0) temp = results['anoms'] indices = [] for index, row in temp.iterrows(): indices.append(row['timestamp']) #now indices has all the indices of anomalies in the data. #get the dates now result_dates = [] for ii in indices: result_dates.append(hours[int(ii)].date()) return result_dates
def test_both_directions_e_value_threshold_med_max(self): results = detect_vec( self.raw_data.iloc[:, 1], max_anoms=0.02, direction="both", period=1440, threshold="med_max", e_value=True ) eq_(len(results["anoms"].columns), 3) eq_(len(results["anoms"].iloc[:, 1]), 6)
def test_both_directions_with_plot(self): results = detect_vec( self.raw_data.iloc[:, 1], max_anoms=0.02, direction="both", period=1440, only_last=True, plot=False ) eq_(len(results["anoms"].columns), 2) eq_(len(results["anoms"].iloc[:, 1]), 25)
def test_check_constant_series(self): s = pd.Series([1] * 1000) results = detect_vec(s, period=14, direction='both', plot=False) eq_(len(results['anoms'].columns), 2) eq_(len(results['anoms'].iloc[:,1]), 0)
def check_error_real_time(): global time global garage_info_occupancy global contracts global transients global anomalies #keep adding the values line_index = 0 for i in garage_dict: contracts_hist = [] contracts_real_time = [] transients_hist = [] transients_real_time = [] current_contract = 0.0 current_transient = 0.0 #print i con = 0 tran = 0 url = "https://my.smarking.net/api/ds/v3/garages/" + str( i) + "/current/occupancy?gb=User+Type" #print url #get the response using the url response = requests.get(url, headers=headers) content = response.content #see if content was received. If nothing received, exit if (content == ""): #print "<p>No content received</p>" continue #we have collected all the data #each datapoint is for an hour in a given day try: garage_info = json.loads(content) except ValueError: #raise ValueError("No JSON Object received, please try again.") continue #print garage_info #parse the JSON-formatted line #if value not received for some reason, add 0 to value if "value" not in garage_info: #did not find anything, continue to next garage continue for item in garage_info["value"]: group = str(item.get("group")) if ('Contract' in group): current_contract = float(item.get("value")) con = 1 if ('Transient' in group): current_transient = float(item.get("value")) tran = 1 #days_window din age theke period length hobe ekhon koto ghonta tar upor # now prepare to get the historical data #getting the closest rounded off hour current_t = datetime.now() next_time = current_t + timedelta(hours=1) #skipping 11PM for now if ((current_t.hour == 23) or (current_t.hour == 0)): continue period_length = current_t.hour + 1 #days_window din age theke ei period download # we have to make sure that we received all data correctly received_flag = 1 for ii in reversed(np.arange(1, days_window + 1)): day = next_time - timedelta(days=ii) pred_url = "https://my.smarking.net/api/ds/v3/garages/" + str( i) + "/past/occupancy/from/" + str(day.year) + "-" + str( day.month) + "-" + str(day.day) + "T00:00:00/" + str( period_length) + "/1h?gb=User+Type" #get the response using the url #print pred_url response = requests.get(pred_url, headers=headers) content = response.content #see if content was received. If nothing received, exit if (content == ""): #print "<p>No content received</p>" received_flag = 0 break #we have collected all the data #each datapoint is for an hour in a given day try: garage_info = json.loads(content) except ValueError: #raise ValueError("No JSON Object received, please try again.") received_flag = 0 break #parse the JSON-formatted line #if value not received for some reason, add 0 to value if "value" not in garage_info: #did not find anything, continue to next garage received_flag = 0 break con = 0 tran = 0 for item in garage_info["value"]: group = str(item.get("group")) #print "group ",group if ('Contract' in group): for jj in item.get("value"): contracts_hist.append(jj) con = 1 if ('Transient' in group): for jj in item.get("value"): transients_hist.append(jj) tran = 1 if ((con == 0) and (tran == 0)): #print "did not receive contract and transient" received_flag = 0 break #if we did not receive all data correctly, go to a different garage if (received_flag == 0): continue #now contruct the data for now/ recent pred_url = "https://my.smarking.net/api/ds/v3/garages/" + str( i) + "/past/occupancy/from/" + str(current_t.year) + "-" + str( current_t.month) + "-" + str( current_t.day) + "T00:00:00/" + str(period_length - 1) + "/1h?gb=User+Type" #get the response using the url #print pred_url response = requests.get(pred_url, headers=headers) content = response.text #see if content was received. If nothing received, exit if (content == ""): #print "<p>No content received</p>" continue #we have collected all the data #each datapoint is for an hour in a given day try: garage_info = json.loads(content) except ValueError: #raise ValueError("No JSON Object received, please try again.") continue #parse the JSON-formatted line #if value not received for some reason, add 0 to value if "value" not in garage_info: #did not find anything, continue to next garage continue con = 0 tran = 0 for item in garage_info["value"]: group = str(item.get("group")) #print "group ",group if ('Contract' in group): for jj in item.get("value"): contracts_real_time.append(jj) #finally append the real time data contracts_real_time.append(current_contract) con = 1 if ('Transient' in group): for jj in item.get("value"): transients_real_time.append(jj) transients_real_time.append(current_transient) tran = 1 if ((con == 0) and (tran == 0)): #no data received continue #now form the training signal appending the history with the #real time training_contract = [] training_transient = [] if (con == 1): for ii in contracts_hist: training_contract.append(ii) for ii in contracts_real_time: training_contract.append(ii) #detect anomalies and report log_file.write( str(i) + " running contract anomaly detection " + str(datetime.now()) + '\n') log_file.flush() #not enough data for signal processing in early hours indices = [] if ((current_t.hour == 1) or (current_t.hour == 2) or (current_t.hour == 3) or (current_t.hour == 4) or (current_t.hour == 5)): indices = get_iqr_anomaly(training_contract) else: df1 = Series((v for v in training_contract)) try: results = detect_vec(df1, period=period_length, max_anoms=0.02, direction='both') except RuntimeError: #there is something wrong with the data, may be not periodic #print "could not run detect_vec" continue temp = results['anoms'] for index, row in temp.iterrows(): indices.append(row['timestamp']) anomalies = [ mm for mm in indices if mm >= (period_length * (days_window + 1) - 2) ] if anomalies: #check how many times if (anomaly_count_con[line_index] == 5): print(i, datetime.now(), anomaly_count_con[line_index], " Contract anomaly ", current_contract) anomaly_count_con[line_index] = 0 else: anomaly_count_con[ line_index] = anomaly_count_con[line_index] + 1 if (tran == 1): for ii in transients_hist: training_transient.append(ii) for ii in transients_real_time: training_transient.append(ii) log_file.write( str(i) + " running transient anomaly detection " + str(datetime.now()) + '\n') log_file.flush() indices = [] if ((current_t.hour == 1) or (current_t.hour == 2) or (current_t.hour == 3) or (current_t.hour == 4) or (current_t.hour == 5)): indices = get_iqr_anomaly(training_transient) else: df1 = Series((v for v in training_transient)) try: results = detect_vec(df1, period=period_length, max_anoms=0.02, direction='both') except RuntimeError: #there is something wrong with the data, may be not periodic #print "could not run detect_vec" continue temp = results['anoms'] for index, row in temp.iterrows(): indices.append(row['timestamp']) anomalies = [ mm for mm in indices if mm >= (period_length * (days_window + 1) - 2) ] if anomalies: #check how many times if (anomaly_count_tran[line_index] == 5): print(i, datetime.now(), anomaly_count_tran[line_index], " Transient anomaly ", current_transient) anomaly_count_tran[line_index] = 0 else: anomaly_count_tran[ line_index] = anomaly_count_tran[line_index] + 1 #else: # print "no anomalies" line_index = line_index + 1 threading.Timer(600, check_error_real_time).start()