def requests_predictions(): data_points = reader.create_regression_dp() dp = data_points.T[1] dp = dp.astype(float) threshold_for_bug = 1 dp[dp < threshold_for_bug] = threshold_for_bug start = 100 end = 5092 p = 2 d = 1 q = 2 pprint(np.log(dp)) arma20 = sm.tsa.ARIMA(np.log(dp), order=(p, d, q)).fit() predicted_values = arma20.predict(start=100, end=end, dynamic=True, typ="levels") pprint(np.exp(predicted_values)) plt.plot(data_points.T[0][100:], predicted_values, color="r") plt.plot(data_points.T[0], np.log(dp), color="b") plt.title("ARIMA Prediction with (" + str(p) + ", " + str(d) + ", " + str(q) + ")") plt.show() return
def requests_predictions(): data_points = reader.create_regression_dp() dp = data_points.T[1] #plt.plot(data_points.T[0][], predicted_values, color="r") plt.plot(data_points.T[0], data_points.T[1], color="b") plt.show() return
def svr(): dp = reader.create_regression_dp() X = dp.T[0] X = np.reshape(X, (len(X), 1)) Y = dp.T[1] X_train = X[:2500] Y_train = Y[:2500] X_test = X[2500:] clf = SVR(C=1.0, epsilon=0.2) clf.fit(X_train, Y_train) Y_result = clf.predict(X) plotter.plot_regression(X, Y, Y_result) return
def polynomial(alpha=10000, degree=2): dp = reader.create_regression_dp() X = dp.T[0] X = np.reshape(X, (len(X), 1)) Y = dp.T[1] X = PolynomialFeatures(degree).fit_transform(X) X_train = X[:2500] Y_train = Y[:2500] X_test = X[2500:] Y_test = Y[2500:] model = Ridge(alpha=alpha).fit(X_train, Y_train) Y_result = model.predict(X) plotter.plot_regression(X, Y, Y_result) return
def ridge(alpha=10): dp = reader.create_regression_dp() X = dp.T[0] X = np.reshape(X, (len(X), 1)) Y = dp.T[1] X_train = X[:2500] X_train = np.reshape(X_train, (2500, 1)) Y_train = Y[:2500] Y_train = np.reshape(Y_train, (2500, 1)) X_test = X[2500:] X_test = np.reshape(X_test, (len(X_test), 1)) Y_test = Y[2500:] model = Ridge(alpha=alpha).fit(X_train, Y_train) Y_result = model.predict(X) plotter.plot_regression(X, Y, Y_result) return
def run(update_freq=0, steps_advance=0, starting_step=0, prediction_type="one", prediction_latency=20): cluster_data = reader.create_data_points_no_requests() current_data = cluster_data[:starting_step] regression_data = reader.create_regression_dp() current_step = starting_step current_vm_number = cluster_data[starting_step][1] last_step = len(cluster_data) - 1 next_vm_start_time = -1 vm_start_in_progress = False reclassify_countdown = reclassify_latency # Initial classification thresholds = classify(current_data) # save the data generated by the algorithm output_data = list() next_prediction = current_step while (current_step <= last_step): # Run what happens at a single time step # compute current mean cpu current_mean_cpu_usage = (cluster_data[current_step][0] * cluster_data[current_step][1]) / current_vm_number if current_mean_cpu_usage > 100: current_mean_cpu_usage = 100 # Check if new VM started if vm_start_in_progress: if next_vm_start_time == current_step: current_vm_number += 1 vm_start_in_progress = False # If needed reclassify data if reclassify_countdown > 0: reclassify_countdown -= 1 else: reclassify_countdown = reclassify_latency thresholds = classify(current_data) if next_prediction == current_step: next_prediction = current_step + prediction_latency # Predict if last_step > current_step + steps_advance: # Make a prediction if prediction_type == "one": prediction = predictor_one_value(regression_data, current_step, steps_advance) / current_vm_number prediction += np.random.randint(low=-5, high=5) decision = clustering.make_decision_onevalue(thresholds, current_vm_number, prediction) elif prediction_type == "many": predictions = predictor_many(regression_data, current_step) decision = clustering.make_decision_many(thresholds, current_vm_number, predictions) elif prediction_type == "mean": predictions = predictor_mean(regression_data, current_step, steps_advance) decision = clustering.make_decision_mean(thresholds, current_vm_number, predictions) # SCALE IN if decision == 0 and current_vm_number > 1: current_vm_number -= 1 # SCALE OUT if decision == 2 and vm_start_in_progress == False: vm_start_in_progress = True next_vm_start_time = current_step + vm_start_latency # Update data go to next step current_data = np.append(current_data, [[current_mean_cpu_usage, current_vm_number]], axis=0) output_data.append([current_mean_cpu_usage, current_vm_number]) current_step += 1 compare(cluster_data, output_data, starting_step)