def test(plot=True): ''' Tests validity of error/anomaly identification algorithm Parameters: plot (bool) - True if you want to see a plot of the data, the fitted polynomial, the detrended stream, and error datapoints in 2 plots Returns: None ''' #open file f = open("Error CSV Files/3.12.2015_error.csv") reader = csv.reader(f) data = [] for i in reader: dt = convert_str_to_dt(i[0]) for j in range(1,len(i)): i[j] = float(i[j]) data.append([dt]+i[1:]) f.close() #create different datasets date_data = [i[0] for i in data] rcmpl_data = [i[1] for i in data] blocked_data = [i[2] for i in data] unblocked_data = [i[3] for i in data] x_vals = [i for i in range(len(rcmpl_data))] #fit polynomial to RCMPL data y_vals,y_poly = p_poly.poly_detrend(x_vals,rcmpl_data,degree=6,d_type = "d") #detect "anomalies" error_vals = error_detect(y_vals,0,20) error_x = [i[0] for i in error_vals] error_y = [i[1] for i in error_vals] init_error_y = [rcmpl_data[i] for i in error_x] #plot results if plot: plt.figure(1) plt.subplot(211) plt.title("{}".format(date_data[0].date())) plt.plot(x_vals,rcmpl_data,"bo",x_vals,y_poly,"r-",init_error_y) plt.subplot(212) plt.plot(x_vals,y_vals,"b",error_x,error_y,"ro") plt.show()
def main(load_error_to_db = False,load_error_to_dataset = False): #read in data f = open("prototype1 (4).csv") reader = csv.reader(f) data = [] for i in reader: dt = poly_test.convert_str_to_dt(i[0]) for j in range(1,len(i)): i[j] = float(i[j]) data.append([dt]+i[1:]) f.close() #create different major datasets date_data = [i[0] for i in data] rcmpl_data = [i[1] for i in data] blocked_data = [i[2] for i in data] unblocked_data = [i[3] for i in data] error_data = [] index = 0 while index < len(data): this_dt = data[index][0].replace(hour = 0).replace(minute = 0) next_dt = this_dt + datetime.timedelta(days = 1) daily_data = [] #group data by day while this_dt < next_dt and index < len(data)-1: daily_data.append([index,data[index][0],data[index][1]]) index += 1 this_dt = data[index][0].replace(hour = 0).replace(minute = 0) #create relevant lists x_vals = [i for i in range(len(daily_data))] y_vals = [i[2] for i in daily_data] #create/fit polynomial and detrend data y_new,y_poly = p_poly.poly_detrend(x_vals,y_vals,degree=6,d_type="d") #detect errors error_vals = poly_test.error_detect(y_new,0,20) #build error_data list for i in range(len(error_vals)): error_vals[i] = list(error_vals[i]) error_vals[i][0] = daily_data[error_vals[i][0]][1] error_data.append(error_vals[i]) if index == len(data) - 1: break #add relevant information to original dataset loaded in for i in range(len(data)): error_flag = False for j in range(len(error_data)): if data[i][0] == error_data[j][0]: #data[i].append(error_data[j][1]) data[i].append(error_data[j][2]) data[i].append(1) error_flag = True error_data.pop(j) break if not error_flag: #data[i].append(None) data[i].append(None) data[i].append(0) #create binary (1 for error at timestamp 0 ow) error list error_binary = [i[-1] for i in data] #fill in holes for errors error_binary_new = checkErrors(error_binary) for i in range(len(error_binary_new)): data[i].append(error_binary_new[i]) #create error list to write out/upload to db error_list = [] for i in range(len(data)): if data[i][-1] == 1: if data[i][4] != None: latest_std = data[i][4] temp = [data[i][0],data[i][4]] else: data[i][4] = latest_std temp = [data[i][0],latest_std] error_list.append(temp) if load_error_to_db: #write out contents f = open("error_ts_score.csv",'w') writer = csv.writer(f,lineterminator="\n") writer.writerows(error_list) f.close() #upload data to db db = MySQLdb.connect("11.120.36.241","internship","Pr3dict!v@","predictiveAnalytics") c = db.cursor() sql = "LOAD DATA LOCAL INFILE 'error_ts_score.csv' INTO TABLE order_anomalies FIELDS TERMINATED BY ','" c.execute(sql) db.commit() db.close() if upload_master_dataset: #create updated data list data = [[i[0],i[1],i[2],i[3],i[6],i[4]] for i in data] #write out contents f = open("order_dataset.csv",'w') writer = csv.writer(f,lineterminator="\n") writer.writerows(data) f.close() #upload data to db db = MySQLdb.connect("11.120.36.241","internship","Pr3dict!v@","predictiveAnalytics") c = db.cursor() sql = "LOAD DATA LOCAL INFILE 'order_dataset.csv' INTO TABLE order_dataset FIELDS TERMINATED BY ','" c.execute(sql) db.commit() db.close()