def parse_data(path, verbose=1): first = True second = True k = 0 counter = 0 with open(path, "r") as inputFile: for line in inputFile: if first: first = False tokens = line.split(",") input_size = int(tokens[0]) output_size = int(tokens[1]) samples_size = int(tokens[2]) inputs = numpy.zeros((samples_size, input_size)) outputs = numpy.zeros((samples_size, output_size)) elif second: second = False if verbose: Support.colored_print("Parameters: ", "blue") Support.colored_print(line, "blue") else: counter += 1 if counter < samples_size: input, output = line.split('=') for i, e in enumerate(input.split()): inputs[k][i] = float(e.strip()) for i, e in enumerate(output.split()): outputs[k][i] = float(e.strip()) k += 1 else: break return inputs, outputs, input_size, output_size
def evaluate(path_network, input): Support.colored_print("Loading neural network...", "blue") neural_network = NeuralNetwork.NeuralNetwork() neural_network.load(path_network) Support.colored_print("Evaluating...", "blue") result = neural_network.evaluate(input) Support.colored_print(result, "pink")
def gradient_descent(train_elements, alpha, numIterations, k, verbose=0, jump_enabled=0): inputs = [] outputs = [] for e in range(0, len(train_elements)): inputs.append(train_elements[e].input) outputs.append(train_elements[e].output) x = numpy.asarray(inputs) y = numpy.asarray(outputs) m, n = numpy.shape(x) theta = numpy.ones(n) x_trans = x.transpose() counter_for_jump = 0 previous_cost = 0 for i in range(0, numIterations): results = [] for j in range(len(train_elements)): results.append( knn.get_error_estimation_weighted_on_input( train_elements[j].input, theta, train_elements[j].neighbors_i, train_elements[j].neighbors_o, k, False)) hypothesis = numpy.asarray(results) loss = hypothesis - y cost = numpy.sum(loss**2) / (2 * m) if verbose: Support.colored_print("Iteration %d | Cost: %f" % (i, cost), "red") if jump_enabled: if previous_cost == cost: counter_for_jump += 1 if counter_for_jump > 10: counter_for_jump = 0 if cost > 10: # making jump # selecting random indexes to perturbate indexes_to_perturbate = numpy.random.choice( range(len(theta)), int(float(len(theta)) * 0.4), replace=False) for j in range(len(indexes_to_perturbate)): # selecting random percentage perturbation perturbation_value = random.randint(1, 6) * 0.1 perturbated = theta[ indexes_to_perturbate[j]] * perturbation_value if random.randint(0, 2) == 0: perturbated *= -1 theta[indexes_to_perturbate[j]] = perturbated i -= 1 continue else: previous_cost = cost # avg gradient per example gradient = numpy.dot(x_trans, loss) / m # update theta = theta - alpha * gradient cost = numpy.sum(loss**2) / (2 * m) return theta, cost
set_training_i, set_training_o, input_size, _ = Parser.parse_data( path_training_set, 0) set_test_i, set_test_o, _, _ = Parser.parse_data(path_test_set, 0) set_training_big_i = set_training_i[:-20] set_training_big_o = set_training_o[:-20] set_training_little_i = set_training_i[-20:] set_training_little_o = set_training_o[-20:] verbose = True quantity_neighbors = 5 if nearest_found: if verbose: Support.colored_print("Loading neighbors...", "yellow") else: if verbose: Support.colored_print("Searching neighbors...", "yellow") train_elements = [] for i in range(len(set_training_little_i)): current_input = set_training_little_i[i] current_output = set_training_little_o[i] if not nearest_found: # finding neighbors neighbors_i, neighbors_o = knn.find_k_neighbors( current_input, set_training_big_i, set_training_big_o, quantity_neighbors) # saving neighbors path_saving_neighbors = path_saving_base_neighbors + "/neighbors_" + str(
# verifying sum_relative_error_model = 0 for sample_selected in range(0, samples_quantity): expected_output = expected_outputs_wp[sample_selected][position_output] real_output_SVR = model_SVR.predict(input_rf[sample_selected].reshape(1, -1)) #real_output_KRR = model_KRR.predict(input_wp[sample_selected].reshape(1, -1)) real_output_RegressionTree = model_RegressionTree.predict(input_rf[sample_selected].reshape(1, -1)) real_output_RandomForest = model_RandomForest.predict(input_wpaw[sample_selected].reshape(1, -1)) real_output_GBRT = model_GBRT.predict(input_wpwl[sample_selected].reshape(1, -1)) real_output_BaggingRegressor = model_BaggingRegressor.predict(input_wpaw[sample_selected].reshape(1, -1)) real_output_ExtraTreeRegressor = model_ExtraTreeRegressor.predict(input_wp[sample_selected].reshape(1, -1)) real_output_AdaBoostRegressor = model_AdaBoostRegressor.predict(input_rf[sample_selected].reshape(1, -1)) if detailed_verbose != 0: Support.colored_print("-------------------------------------------", "blue") Support.colored_print("expected: " + str(expected_output), "green") Support.colored_print("model SVR: " + str(real_output_SVR), "green") #Support.colored_print("model KRR: " + str(real_output_KRR), "green") Support.colored_print("model RegressionTree: " + str(real_output_RegressionTree), "green") Support.colored_print("model RandomForest: " + str(real_output_RandomForest), "green") Support.colored_print("model GBRT: " + str(real_output_GBRT), "green") Support.colored_print("model BaggingRegressor: " + str(real_output_BaggingRegressor), "green") Support.colored_print("model ExtraTreeRegressor: " + str(real_output_ExtraTreeRegressor), "green") Support.colored_print("model AdaBoostRegressor: " + str(real_output_AdaBoostRegressor), "green") errors = [0, 0, 0, 0, 0, 0, 0] # relative_error_model_KRR = abs((real_output_KRR - expected_output) / real_output_KRR) if real_output_SVR != 0:
input_wpaw[sample_selected].reshape(1, -1)) elif selected_output == Model.EXTRA_TREE_REGRESSOR: real_output = model_ExtraTreeRegressor.predict( input_wp[sample_selected].reshape(1, -1)) elif selected_output == Model.GRADIENT_BOOSTING_REGRESSOR: real_output = model_GBRT.predict(input_wpwl[sample_selected].reshape( 1, -1)) elif selected_output == Model.BAGGING_REGRESSOR: real_output = model_BaggingRegressor.predict( input_wpaw[sample_selected].reshape(1, -1)) elif selected_output == Model.ADABOOST_REGRESSOR: real_output = model_AdaBoostRegressor.predict( input_rf[sample_selected].reshape(1, -1)) if detailed_verbose != 0: Support.colored_print("-------------------------------------------", "blue") Support.colored_print("expected: " + str(expected_output), "green") Support.colored_print("model selected: " + str(selected_output), "green") Support.colored_print("model output: " + str(real_output), "green") if real_output == 0: real_output = 0.0001 relative_error = abs((real_output - expected_output) / (real_output)) sum_relative_error_model += relative_error # showing statistics Support.colored_print("Statistics:", "pink") Support.colored_print("Samples quantity: " + str(samples_quantity), "pink") Support.colored_print( "Percentage quality (relative error) model: " +
for sample_selected in range(0, samples_quantity): production = input[sample_selected][3] + \ input[sample_selected][4] + \ input[sample_selected][5] + \ input[sample_selected][6] - \ input[sample_selected][8] for output_selected in range(0, output_quantity): if output_selected != 3: production += expected_outputs[sample_selected][output_selected] expected_output = expected_outputs[sample_selected][3] real_output = model.predict(input[sample_selected].reshape(1, -1)) retrieved_output = input[sample_selected][7] - production # Support.colored_print("-------------------------------------------", "blue") # Support.colored_print("expected: " + str(expected_output), "green") #Support.colored_print("model: " + str(real_output), "green") # Support.colored_print("retrieved: " + str(retrieved_output), "green") relative_error_model = abs((real_output - expected_output) / real_output) relative_error_retrieved = abs((retrieved_output - expected_output) / retrieved_output) sum_relative_error_model += relative_error_model sum_relative_error_retrieved += relative_error_retrieved # showing statistics Support.colored_print("Statistics:", "pink") Support.colored_print("Samples quantity: " + str(samples_quantity), "pink") Support.colored_print("Percentage quality (relative error) model: " + str(sum_relative_error_model/samples_quantity), "pink") Support.colored_print("Percentage quality (relative error) retrieved: " + str(sum_relative_error_retrieved/samples_quantity), "pink") Support.colored_print("Done!", "red")
expected_output_3 = expected_outputs_wpaw[sample_selected][3] expected_output_4 = expected_outputs_wp[sample_selected][4] #expected_output_5 = expected_outputs_rf[sample_selected][5] real_output_production_1 = model_production_1.predict( input_wp[sample_selected].reshape(1, -1)) real_output_production_2 = model_production_2.predict( input_wpaw[sample_selected].reshape(1, -1)) real_output_production_3 = model_production_3.predict( input_wp[sample_selected].reshape(1, -1)) real_output_production_4 = model_production_4.predict( input_wp[sample_selected].reshape(1, -1)) #real_output_production_5 = model_production_5.predict(input_rf[sample_selected].reshape(1, -1)) if detailed_verbose != 0: Support.colored_print("-------------------------------------------", "blue") Support.colored_print( "model output 1: " + str(real_output_production_1) + " expected: " + str(expected_output_1), "green") Support.colored_print( "model output 2: " + str(real_output_production_2) + " expected: " + str(expected_output_2), "green") Support.colored_print( "model output 3: " + str(real_output_production_3) + " expected: " + str(expected_output_3), "green") Support.colored_print( "model output 4: " + str(real_output_production_4) + " expected: " + str(expected_output_4), "green") #Support.colored_print("model output 5: " + str(real_output_production_5) + " expected: " + str(expected_output_5), "green") relative_error_production_1 = Support.calculate_relative_error(
root_directory = "/Users/francesco/Desktop/on_error_2nd/" for dir in os.listdir(root_directory): if not dir[0] == '.': directory_nation = root_directory + dir directory_nation_train = directory_nation + "/train/" directory_nation_test = directory_nation + "/test/" files = os.listdir(directory_nation_train) files.sort() for file in files: if not file[0] == '.': path_training_set_prediction = directory_nation_train + file path_test_set_prediction = directory_nation_test + file.replace( "train", "test") Support.colored_print("______________", "red") Support.colored_print( "Current file: " + path_training_set_prediction, "yellow") Support.colored_print( "Current file: " + path_test_set_prediction, "yellow") training_set_input, training_set_output, _, _ = Parser.parse_data( path_training_set_prediction, 0) test_set_input, test_set_output, _, _ = Parser.parse_data( path_test_set_prediction, 0) best_k = 0 best_k_weighted = 0 avg_accuracy_best_k = float("inf") avg_accuracy_best_k_weighted = float("inf")
normalize=False, positive=False, precompute='auto', random_state=0, selection='cyclic', tol=0.0001, verbose=0) model_name = "ELASTIC_NET_CV" elif selected_model == Model.PLS_REGRESSION: model = PLSRegression(n_components=2) model_name = "PLS_REGRESSION" elif selected_model == Model.LASSO_CV: model = LassoCV() model_name = "LASSO_CV" else: Support.colored_print("No method selected!", "red") sys.exit(0) Support.colored_print("Training " + model_name + "...", "yellow") t0 = time.time() model.fit(X[:train_size], y[:train_size]) model_fit = time.time() - t0 t0 = time.time() y_model = model.predict(X_plot) model_predict = time.time() - t0 sum_relative_error_real = 0 sum_relative_error_plus = 0 sum_relative_error_minus = 0 samples_quantity, _ = input_for_test.shape for sample_selected in range(0, samples_quantity):
"277.000000 0.000000 5.000000 0.000000 1753.000000 398.000000 2855.000000 27313.000000 -5612.000000 83.170000 28.951000 20.790000 212.000000 6799.000000 3494.000000 78.000000 39.000000 6010.000000" .split(' ') ] given_output = 232.0 given_error = 5.989121 path_model = "/Users/francesco/Desktop/Cose da Sistemare/best_predictors/all/fossil_coal.joblib" path_samples = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/training_sets/training_set_fossil_coal_error.txt" else: k = int(sys.argv[1]) given_input = [float(x) for x in sys.argv[2].split(' ')] given_output = float(sys.argv[3]) given_error = float(sys.argv[4]) path_model = sys.argv[5] path_samples = sys.argv[6] model = joblib.load(path_model) given_samples, given_errors, _, _ = Parser.parse_data(path_samples, 0) prediction = model.predict((numpy.asarray(given_input)).reshape(1, -1)) Support.colored_print("model output: " + str(prediction), "blue") Support.colored_print("real output: " + str(given_output), "blue") errors = knn.find_k_neighbors(given_input, given_samples, given_errors, k) error = knn.calculate_error(errors) Support.colored_print("distance based error: " + str(error), "red") Support.colored_print("real error: " + str(given_error), "red") Support.colored_print("Completed!", "pink")
path_training_set_error = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/training_sets/training_set_fossil_oil_error.txt" path_test_set_prediction = "/Users/francesco/Desktop/Cose da Sistemare/datas/ts/test_set_wpwl.txt" path_test_set_error = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/test_sets/test_set_fossil_oil_error.txt" model = joblib.load(path_model) training_set_error_input, training_set_error_output, _, _ = Parser.parse_data(path_training_set_error, 0) test_set_prediction_input, test_set_prediction_output, _, _ = Parser.parse_data(path_test_set_prediction, 0) _, test_set_error_output, _, _ = Parser.parse_data(path_test_set_error, 0) best_k = 0 avg_accuracy_best_k = float("inf") all_avg_values = [] for current_k in range(51, (k_to_try + 1)): Support.colored_print("Current k: " + str(current_k), "blue") sum_errors = 0 for i in range(0, len(test_set_prediction_input)): current_input = test_set_prediction_input[i] prediction = model.predict((numpy.asarray(current_input)).reshape(1, -1)) if verbose: Support.colored_print("Model output: " + str(prediction), "blue") Support.colored_print("Real output: " + str(test_set_prediction_output[i][index_output_prediction]), "blue") error = knn.get_error_estimation(current_input, training_set_error_input, training_set_error_output, current_k, weighted) if verbose: Support.colored_print("Knn based error: " + str(error), "red") Support.colored_print("Real error: " + str(test_set_error_output[i][0]), "red") Support.colored_print("Absolute error knn estimation: " + str(abs(error - test_set_error_output[i][0])), "green") sum_errors += abs(error - test_set_error_output[i][0])
def train(path_training_set, path_target_set, path_output, epochs, batch_size, load, output_selected=-1): # keeping data Support.colored_print("Loading training set...", "green") training_input, training_output, input_size, output_size = Parser.parse_data( path_training_set) if output_selected != -1: training_output = training_output[:, output_selected] output_size = 1 Support.colored_print("Loading test set...", "green") test_input, test_output, x, y = Parser.parse_data(path_target_set) if output_selected != -1: test_output = test_output[:, output_selected] output_size = 1 # building neural network Support.colored_print("Building neural network...", "green") neural_network = NeuralNetwork.NeuralNetwork() if load == 1: neural_network.load(path_output) else: neural_network.create(input_size, output_size) # training Support.colored_print("Training...", "green") if output_selected != -1: neural_network.train(training_input, training_output, test_input, test_output, epochs=epochs, batch_size=batch_size, verbose=1, saving_path=path_output) else: neural_network.train(training_input, training_output, test_input, test_output, epochs=epochs, batch_size=batch_size, verbose=1) # saving neural network Support.colored_print("Saving...", "green") neural_network.save(path_output) Support.colored_print("Finished!", "green")
#path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_LASSO_CV/model_" #path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_PLS_REGRESSION/model_" #path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_REGRESSION_TREE/model_" #path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_SVR/model_" path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_GPML/model_" path_test_set = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/test_sets/test_set_other_error.txt" input, expected_outputs, input_size, output_size = Parser.parse_data( path_test_set) samples_quantity, _ = input.shape output_quantity = len(expected_outputs[0]) # verifying for output_selected in range(0, output_quantity): model = joblib.load(path_predictors + str(output_selected) + ".joblib") Support.colored_print("Verifying output n: " + str(output_selected), "blue") sum_relative_error = 0 sum_absolute_error = 0 for sample_selected in range(0, samples_quantity): expected_output = expected_outputs[sample_selected][output_selected] real_output = model.predict(input[sample_selected].reshape(1, -1)) real_output *= 100 if real_output == 0: real_output = 0.0001 relative_error = abs((real_output - expected_output) / real_output) absolute_error = abs(real_output - expected_output) sum_relative_error += relative_error sum_absolute_error += absolute_error # showing result if verbose == 1: Support.colored_print("Sample n: " + str(sample_selected), "green")
REGRESSION_TREE = 3 # done RANDOM_FOREST = 4 # in progress EXTRA_TREE_REGRESSOR = 5 # scheduled GRADIENT_BOOSTING_REGRESSOR = 6 # scheduled BAGGING_REGRESSOR = 7 # scheduled ADABOOST_REGRESSOR = 8 # scheduled selected_model = Model.ADABOOST_REGRESSOR path_training_set = "/Users/francesco/Desktop/disp/rf/test_set.txt" base_path_saving = "/Users/francesco/Desktop" output_quantity = 6 for output_selected in range(0, output_quantity): # Loading sample data Support.colored_print("Loading training set...", "green") X, y, input_size, output_size = Parser.parse_data(path_training_set) train_size = X.size y = y[:, output_selected] X_plot = numpy.zeros((1, input_size)) X_plot[0][0] = X.item(0) # Fit regression model Support.colored_print("Initializing model...", "green") if selected_model == Model.SVR: c_param = [0.001, 0.01, 0.1, 1, 10] gamma_param = [0.001, 0.01, 0.1, 1] model = GridSearchCV(SVR(kernel='rbf'), cv=5, param_grid={"C": c_param, "gamma": gamma_param}) model_name = "SVR" elif selected_model == Model.KRR: model = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5, param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3], "gamma": numpy.logspace(-2, 2, 5)})