def DoExponetialSmooting(series, alpha): results = pd.DataFrame() s_single = SingleSeriesProcessor.exponential_smoothing(alpha, series) results["single_smoothing"] = s_single s_double = SingleSeriesProcessor.exponential_smoothing(alpha, s_single) results["double_smoothing"] = s_double results.plot() plt.show()
def getChangeCorrelation(s1, s2, window_size, p_threshold): e_s1 = getChangeEventSeries(s1, window_size, p_threshold) e_s2 = getChangeEventSeries(s2, window_size, p_threshold) cor = SingleSeriesProcessor.calculate_perason(e_s1, e_s2) return e_s1, e_s2, cor
def getChangeEventSeries(s, window_size, p_threshold): # = SingleSeriesProcessor.get_ewma(s, window_size) #s = SingleSeriesProcessor.exponential_smoothing(0.7, s) result = SingleSeriesProcessor.create_value_change_series(s, window_size, p_threshold, True) return result
def plotChangePoints(): window_size = 20 p_threshold = 0.01 filename = "sock-results-HW_6-20" data = pd.read_csv("data/%s.csv" % filename) data = data.fillna(0) col_name1 = "container/orders/FS Reads Bytes" col_name2 = "service/orders/qps(2xx)" #column = data["service/front-end/latency"] column = data[col_name1] #column = data["service/orders/latency"] column2 = data["service/orders/qps(2xx)"] #plt.figure(num='fig 1', figsize=(10, 3), dpi=75, facecolor='#FFFFFF', edgecolor='#0000FF') s1 = column.values s2 = column2.values s1 = SingleSeriesProcessor.get_partial_series(s1, front_percentage=.75) s2 = SingleSeriesProcessor.get_partial_series(s2, front_percentage=.75) e_s1, e_s2, cov = getChangeCorrelation(s1, s2, window_size, p_threshold) print(cov) plt.title(col_name1) DataVisualizer.show_markpoints(s1, e_s1) plt.show() plt.title(col_name2) DataVisualizer.show_markpoints(s2, e_s2) plt.show()
def create_value_change_dataset(df, window, p_threshold): dataset = pd.DataFrame() for index in df.columns.values: series = df[index] sub_series = SingleSeriesProcessor.spiltSeriesByWindow(series.values, window) column = [] for i in range(len(sub_series) - 1): first = sub_series[i] second = sub_series[i + 1] alpha, p_value = stats.ttest_ind(first, second, equal_var=False, nan_policy='omit') # print("alpha %f, p %f" % (alpha, p_value)) if p_value > p_threshold: # if alpha >= 0: # column.append(-1) # else: # column.append(1) column.append(alpha) else: column.append(0) dataset[index] = column return dataset
def start_analysis(): datafile = "data/sock-results-HW_6-20-['service'].csv" window_size = 20 p_threshold = 0.01 filename = "sock-results-HW_6-20-experiment" #filter_groups = [["service"]] filter_groups = [["qps", "success_orders"], ["carts/"]] #filter_groups = [["service"], ["front-end"], ["carts"], ["container/carts", "192.168.199.31"]] #filter_groups = [["service", "192.168.199.35"]] #filter_groups = [["service"], ["front-end"], ["carts"], ["orders"], ["catalogue"], ["user"], ["payment"], ["shipping"]] #filter_groups = [["container/carts", "192.168.199.31"], ["container/orders", "192.168.199.32"], #["container/user", "192.168.199.32"], #["container/catalogue", "192.168.199.33"], #["container/shipping", "192.168.199.33"], #["container/front-end", "192.168.199.34"], ["container/payment", "192.168.199.35"]] # filter_groups = [["service"], # ["front-end/"], ["carts/"], ["orders/"], ["catalogue/"], ["user/"], ["payment/"], ["shipping/"], # ["container/carts"], ["container/orders"], ["container/catalogue"], ["container/user"], # ["container/carts/", "192.168.199.31"], ["container/orders/", "192.168.199.32"], # ["container/user/", "192.168.199.32"], # ["container/catalogue/", "192.168.199.33"], # ["container/shipping/", "192.168.199.33"], # ["container/front-end/", "192.168.199.34"], ["container/payment/", "192.168.199.35"] # ] data = pd.read_csv("data/%s.csv" % filename) #data = DataFrameProcessor.getPartialDataframe(data, end=0.025) #data = DataFrameProcessor.getPartialDataframe(data, front=0.0125 ,end=0.025) data = data.fillna("null") causality_instance = CausalityAnalysis() causality_instance.causalInit() node_list = [] edge_list = [] for filter_list in filter_groups: sub_data = DataFrameProcessor.filter_columns(data, filter_list) sub_file_name = "%s-%s.csv" % (filename, filter_list) sub_file_name = sub_file_name.replace("/", "-") sub_file_name = "data/%s" % sub_file_name sub_data.to_csv(sub_file_name, index=False) nodes, edges = causality_instance.generateCausalityGraph(sub_file_name, "fisherz", "Pc") node_list.extend(nodes) edge_list.extend(edges) causality_instance.causalDest() print(node_list) data = pd.read_csv("data/%s.csv" % filename) data = data.fillna(0) for edge in edge_list: column1 = data[edge.node1_name] column2 = data[edge.node2_name] #print(SingleSeriesProcessor.calculate_perason(column1.values,column2.values)) s1 = SingleSeriesProcessor.get_partial_series(column1.values, .5) s2 = SingleSeriesProcessor.get_partial_series(column2.values, .5) e_s1, e_s2, cov = getChangeCorrelation(s1, s2, window_size, p_threshold) edge.weight = cov edge.print() edge_list = GraphBuilder.clean_edges(edge_list) graph = GraphBuilder.build_graph_dict(edge_list) GraphVisualizer.drawCausalityGraph(edge_list) #routes = list(GraphSearcher.traverse(graph, "service/front-end/qps(2xx)", stack)) routes, search_result = GraphSearcher.search_source(graph, "service/orders/success_orders") for route in routes: print(GraphSearcher.path_to_stirng(route, graph, with_weight=False)) GraphVisualizer.drawRootCauseGraph(search_result, graph)
def PredictByHoltWinters(series, alpha): predict = SingleSeriesProcessor.predict_value_with_exp_smoothing_3( alpha, series) DataVisualizer.show_predicted_data(series, predict) plt.plot(predict, label="predict")