예제 #1
0
def parse_data(path, verbose=1):
    first = True
    second = True
    k = 0
    counter = 0
    with open(path, "r") as inputFile:
        for line in inputFile:
            if first:
                first = False
                tokens = line.split(",")
                input_size = int(tokens[0])
                output_size = int(tokens[1])
                samples_size = int(tokens[2])
                inputs = numpy.zeros((samples_size, input_size))
                outputs = numpy.zeros((samples_size, output_size))
            elif second:
                second = False
                if verbose:
                    Support.colored_print("Parameters: ", "blue")
                    Support.colored_print(line, "blue")
            else:
                counter += 1
                if counter < samples_size:
                    input, output = line.split('=')
                    for i, e in enumerate(input.split()):
                        inputs[k][i] = float(e.strip())

                    for i, e in enumerate(output.split()):
                        outputs[k][i] = float(e.strip())
                    k += 1
                else:
                    break
    return inputs, outputs, input_size, output_size
예제 #2
0
def evaluate(path_network, input):
    Support.colored_print("Loading neural network...", "blue")
    neural_network = NeuralNetwork.NeuralNetwork()
    neural_network.load(path_network)
    Support.colored_print("Evaluating...", "blue")
    result = neural_network.evaluate(input)
    Support.colored_print(result, "pink")
예제 #3
0
def gradient_descent(train_elements,
                     alpha,
                     numIterations,
                     k,
                     verbose=0,
                     jump_enabled=0):
    inputs = []
    outputs = []
    for e in range(0, len(train_elements)):
        inputs.append(train_elements[e].input)
        outputs.append(train_elements[e].output)

    x = numpy.asarray(inputs)
    y = numpy.asarray(outputs)
    m, n = numpy.shape(x)
    theta = numpy.ones(n)
    x_trans = x.transpose()
    counter_for_jump = 0
    previous_cost = 0
    for i in range(0, numIterations):
        results = []
        for j in range(len(train_elements)):
            results.append(
                knn.get_error_estimation_weighted_on_input(
                    train_elements[j].input, theta,
                    train_elements[j].neighbors_i,
                    train_elements[j].neighbors_o, k, False))

        hypothesis = numpy.asarray(results)
        loss = hypothesis - y
        cost = numpy.sum(loss**2) / (2 * m)
        if verbose:
            Support.colored_print("Iteration %d | Cost: %f" % (i, cost), "red")

        if jump_enabled:
            if previous_cost == cost:
                counter_for_jump += 1
                if counter_for_jump > 10:
                    counter_for_jump = 0
                    if cost > 10:
                        # making jump
                        # selecting random indexes to perturbate
                        indexes_to_perturbate = numpy.random.choice(
                            range(len(theta)),
                            int(float(len(theta)) * 0.4),
                            replace=False)
                        for j in range(len(indexes_to_perturbate)):
                            # selecting random percentage perturbation
                            perturbation_value = random.randint(1, 6) * 0.1
                            perturbated = theta[
                                indexes_to_perturbate[j]] * perturbation_value
                            if random.randint(0, 2) == 0:
                                perturbated *= -1
                            theta[indexes_to_perturbate[j]] = perturbated
                        i -= 1
                        continue
            else:
                previous_cost = cost

        # avg gradient per example
        gradient = numpy.dot(x_trans, loss) / m
        # update
        theta = theta - alpha * gradient

    cost = numpy.sum(loss**2) / (2 * m)
    return theta, cost
예제 #4
0
    set_training_i, set_training_o, input_size, _ = Parser.parse_data(
        path_training_set, 0)
    set_test_i, set_test_o, _, _ = Parser.parse_data(path_test_set, 0)

    set_training_big_i = set_training_i[:-20]
    set_training_big_o = set_training_o[:-20]

    set_training_little_i = set_training_i[-20:]
    set_training_little_o = set_training_o[-20:]

    verbose = True
    quantity_neighbors = 5

    if nearest_found:
        if verbose:
            Support.colored_print("Loading neighbors...", "yellow")
    else:
        if verbose:
            Support.colored_print("Searching neighbors...", "yellow")

    train_elements = []
    for i in range(len(set_training_little_i)):
        current_input = set_training_little_i[i]
        current_output = set_training_little_o[i]
        if not nearest_found:
            # finding neighbors
            neighbors_i, neighbors_o = knn.find_k_neighbors(
                current_input, set_training_big_i, set_training_big_o,
                quantity_neighbors)
            # saving neighbors
            path_saving_neighbors = path_saving_base_neighbors + "/neighbors_" + str(
예제 #5
0
# verifying
sum_relative_error_model = 0
for sample_selected in range(0, samples_quantity):
    expected_output = expected_outputs_wp[sample_selected][position_output]

    real_output_SVR = model_SVR.predict(input_rf[sample_selected].reshape(1, -1))
    #real_output_KRR = model_KRR.predict(input_wp[sample_selected].reshape(1, -1))
    real_output_RegressionTree = model_RegressionTree.predict(input_rf[sample_selected].reshape(1, -1))
    real_output_RandomForest = model_RandomForest.predict(input_wpaw[sample_selected].reshape(1, -1))
    real_output_GBRT = model_GBRT.predict(input_wpwl[sample_selected].reshape(1, -1))
    real_output_BaggingRegressor = model_BaggingRegressor.predict(input_wpaw[sample_selected].reshape(1, -1))
    real_output_ExtraTreeRegressor = model_ExtraTreeRegressor.predict(input_wp[sample_selected].reshape(1, -1))
    real_output_AdaBoostRegressor = model_AdaBoostRegressor.predict(input_rf[sample_selected].reshape(1, -1))

    if detailed_verbose != 0:
        Support.colored_print("-------------------------------------------", "blue")
        Support.colored_print("expected: " + str(expected_output), "green")
        Support.colored_print("model SVR: " + str(real_output_SVR), "green")
        #Support.colored_print("model KRR: " + str(real_output_KRR), "green")
        Support.colored_print("model RegressionTree: " + str(real_output_RegressionTree), "green")
        Support.colored_print("model RandomForest: " + str(real_output_RandomForest), "green")
        Support.colored_print("model GBRT: " + str(real_output_GBRT), "green")
        Support.colored_print("model BaggingRegressor: " + str(real_output_BaggingRegressor), "green")
        Support.colored_print("model ExtraTreeRegressor: " + str(real_output_ExtraTreeRegressor), "green")
        Support.colored_print("model AdaBoostRegressor: " + str(real_output_AdaBoostRegressor), "green")

    errors = [0, 0, 0, 0, 0, 0, 0]

    # relative_error_model_KRR = abs((real_output_KRR - expected_output) / real_output_KRR)

    if real_output_SVR != 0:
예제 #6
0
            input_wpaw[sample_selected].reshape(1, -1))
    elif selected_output == Model.EXTRA_TREE_REGRESSOR:
        real_output = model_ExtraTreeRegressor.predict(
            input_wp[sample_selected].reshape(1, -1))
    elif selected_output == Model.GRADIENT_BOOSTING_REGRESSOR:
        real_output = model_GBRT.predict(input_wpwl[sample_selected].reshape(
            1, -1))
    elif selected_output == Model.BAGGING_REGRESSOR:
        real_output = model_BaggingRegressor.predict(
            input_wpaw[sample_selected].reshape(1, -1))
    elif selected_output == Model.ADABOOST_REGRESSOR:
        real_output = model_AdaBoostRegressor.predict(
            input_rf[sample_selected].reshape(1, -1))

    if detailed_verbose != 0:
        Support.colored_print("-------------------------------------------",
                              "blue")
        Support.colored_print("expected: " + str(expected_output), "green")
        Support.colored_print("model selected: " + str(selected_output),
                              "green")
        Support.colored_print("model output: " + str(real_output), "green")

    if real_output == 0:
        real_output = 0.0001
    relative_error = abs((real_output - expected_output) / (real_output))
    sum_relative_error_model += relative_error

# showing statistics
Support.colored_print("Statistics:", "pink")
Support.colored_print("Samples quantity: " + str(samples_quantity), "pink")
Support.colored_print(
    "Percentage quality (relative error) model: " +
for sample_selected in range(0, samples_quantity):
    production = input[sample_selected][3] + \
                 input[sample_selected][4] + \
                 input[sample_selected][5] + \
                 input[sample_selected][6] - \
                 input[sample_selected][8]
    for output_selected in range(0, output_quantity):
        if output_selected != 3:
            production += expected_outputs[sample_selected][output_selected]

    expected_output = expected_outputs[sample_selected][3]
    real_output = model.predict(input[sample_selected].reshape(1, -1))
    retrieved_output = input[sample_selected][7] - production

   # Support.colored_print("-------------------------------------------", "blue")
   # Support.colored_print("expected: " + str(expected_output), "green")
    #Support.colored_print("model: " + str(real_output), "green")
#    Support.colored_print("retrieved: " + str(retrieved_output), "green")

    relative_error_model = abs((real_output - expected_output) / real_output)
    relative_error_retrieved = abs((retrieved_output - expected_output) / retrieved_output)
    sum_relative_error_model += relative_error_model
    sum_relative_error_retrieved += relative_error_retrieved

# showing statistics
Support.colored_print("Statistics:", "pink")
Support.colored_print("Samples quantity: " + str(samples_quantity), "pink")
Support.colored_print("Percentage quality (relative error) model: " + str(sum_relative_error_model/samples_quantity), "pink")
Support.colored_print("Percentage quality (relative error) retrieved: " + str(sum_relative_error_retrieved/samples_quantity), "pink")

Support.colored_print("Done!", "red")
    expected_output_3 = expected_outputs_wpaw[sample_selected][3]
    expected_output_4 = expected_outputs_wp[sample_selected][4]
    #expected_output_5 = expected_outputs_rf[sample_selected][5]

    real_output_production_1 = model_production_1.predict(
        input_wp[sample_selected].reshape(1, -1))
    real_output_production_2 = model_production_2.predict(
        input_wpaw[sample_selected].reshape(1, -1))
    real_output_production_3 = model_production_3.predict(
        input_wp[sample_selected].reshape(1, -1))
    real_output_production_4 = model_production_4.predict(
        input_wp[sample_selected].reshape(1, -1))
    #real_output_production_5 = model_production_5.predict(input_rf[sample_selected].reshape(1, -1))

    if detailed_verbose != 0:
        Support.colored_print("-------------------------------------------",
                              "blue")
        Support.colored_print(
            "model output 1: " + str(real_output_production_1) +
            " expected: " + str(expected_output_1), "green")
        Support.colored_print(
            "model output 2: " + str(real_output_production_2) +
            " expected: " + str(expected_output_2), "green")
        Support.colored_print(
            "model output 3: " + str(real_output_production_3) +
            " expected: " + str(expected_output_3), "green")
        Support.colored_print(
            "model output 4: " + str(real_output_production_4) +
            " expected: " + str(expected_output_4), "green")
        #Support.colored_print("model output 5: " + str(real_output_production_5) + " expected: " + str(expected_output_5), "green")

    relative_error_production_1 = Support.calculate_relative_error(
    root_directory = "/Users/francesco/Desktop/on_error_2nd/"

    for dir in os.listdir(root_directory):
        if not dir[0] == '.':
            directory_nation = root_directory + dir
            directory_nation_train = directory_nation + "/train/"
            directory_nation_test = directory_nation + "/test/"

            files = os.listdir(directory_nation_train)
            files.sort()
            for file in files:
                if not file[0] == '.':
                    path_training_set_prediction = directory_nation_train + file
                    path_test_set_prediction = directory_nation_test + file.replace(
                        "train", "test")
                    Support.colored_print("______________", "red")
                    Support.colored_print(
                        "Current file: " + path_training_set_prediction,
                        "yellow")
                    Support.colored_print(
                        "Current file: " + path_test_set_prediction, "yellow")

                    training_set_input, training_set_output, _, _ = Parser.parse_data(
                        path_training_set_prediction, 0)
                    test_set_input, test_set_output, _, _ = Parser.parse_data(
                        path_test_set_prediction, 0)

                    best_k = 0
                    best_k_weighted = 0
                    avg_accuracy_best_k = float("inf")
                    avg_accuracy_best_k_weighted = float("inf")
예제 #10
0
                                 normalize=False,
                                 positive=False,
                                 precompute='auto',
                                 random_state=0,
                                 selection='cyclic',
                                 tol=0.0001,
                                 verbose=0)
            model_name = "ELASTIC_NET_CV"
        elif selected_model == Model.PLS_REGRESSION:
            model = PLSRegression(n_components=2)
            model_name = "PLS_REGRESSION"
        elif selected_model == Model.LASSO_CV:
            model = LassoCV()
            model_name = "LASSO_CV"
        else:
            Support.colored_print("No method selected!", "red")
            sys.exit(0)
        Support.colored_print("Training " + model_name + "...", "yellow")

        t0 = time.time()
        model.fit(X[:train_size], y[:train_size])
        model_fit = time.time() - t0
        t0 = time.time()
        y_model = model.predict(X_plot)
        model_predict = time.time() - t0

        sum_relative_error_real = 0
        sum_relative_error_plus = 0
        sum_relative_error_minus = 0
        samples_quantity, _ = input_for_test.shape
        for sample_selected in range(0, samples_quantity):
예제 #11
0
            "277.000000 0.000000 5.000000 0.000000 1753.000000 398.000000 2855.000000 27313.000000 -5612.000000 83.170000 28.951000 20.790000 212.000000 6799.000000 3494.000000 78.000000 39.000000 6010.000000"
            .split(' ')
        ]
        given_output = 232.0
        given_error = 5.989121
        path_model = "/Users/francesco/Desktop/Cose da Sistemare/best_predictors/all/fossil_coal.joblib"
        path_samples = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/training_sets/training_set_fossil_coal_error.txt"
    else:
        k = int(sys.argv[1])
        given_input = [float(x) for x in sys.argv[2].split(' ')]
        given_output = float(sys.argv[3])
        given_error = float(sys.argv[4])
        path_model = sys.argv[5]
        path_samples = sys.argv[6]

    model = joblib.load(path_model)
    given_samples, given_errors, _, _ = Parser.parse_data(path_samples, 0)

    prediction = model.predict((numpy.asarray(given_input)).reshape(1, -1))

    Support.colored_print("model output: " + str(prediction), "blue")
    Support.colored_print("real output: " + str(given_output), "blue")

    errors = knn.find_k_neighbors(given_input, given_samples, given_errors, k)
    error = knn.calculate_error(errors)

    Support.colored_print("distance based error: " + str(error), "red")
    Support.colored_print("real error: " + str(given_error), "red")

    Support.colored_print("Completed!", "pink")
예제 #12
0
    path_training_set_error = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/training_sets/training_set_fossil_oil_error.txt"
    path_test_set_prediction = "/Users/francesco/Desktop/Cose da Sistemare/datas/ts/test_set_wpwl.txt"
    path_test_set_error = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/test_sets/test_set_fossil_oil_error.txt"

    model = joblib.load(path_model)

    training_set_error_input, training_set_error_output, _, _ = Parser.parse_data(path_training_set_error, 0)
    test_set_prediction_input, test_set_prediction_output, _, _ = Parser.parse_data(path_test_set_prediction, 0)
    _, test_set_error_output, _, _ = Parser.parse_data(path_test_set_error, 0)

    best_k = 0
    avg_accuracy_best_k = float("inf")
    all_avg_values = []

    for current_k in range(51, (k_to_try + 1)):
        Support.colored_print("Current k: " + str(current_k), "blue")
        sum_errors = 0
        for i in range(0, len(test_set_prediction_input)):
            current_input = test_set_prediction_input[i]
            prediction = model.predict((numpy.asarray(current_input)).reshape(1, -1))
            if verbose:
                Support.colored_print("Model output: " + str(prediction), "blue")
                Support.colored_print("Real output: " + str(test_set_prediction_output[i][index_output_prediction]), "blue")

            error = knn.get_error_estimation(current_input, training_set_error_input, training_set_error_output, current_k, weighted)
            if verbose:
                Support.colored_print("Knn based error: " + str(error), "red")
                Support.colored_print("Real error: " + str(test_set_error_output[i][0]), "red")
                Support.colored_print("Absolute error knn estimation: " + str(abs(error - test_set_error_output[i][0])), "green")

            sum_errors += abs(error - test_set_error_output[i][0])
예제 #13
0
def train(path_training_set,
          path_target_set,
          path_output,
          epochs,
          batch_size,
          load,
          output_selected=-1):
    # keeping data
    Support.colored_print("Loading training set...", "green")
    training_input, training_output, input_size, output_size = Parser.parse_data(
        path_training_set)
    if output_selected != -1:
        training_output = training_output[:, output_selected]
        output_size = 1
    Support.colored_print("Loading test set...", "green")
    test_input, test_output, x, y = Parser.parse_data(path_target_set)
    if output_selected != -1:
        test_output = test_output[:, output_selected]
        output_size = 1
    # building neural network
    Support.colored_print("Building neural network...", "green")
    neural_network = NeuralNetwork.NeuralNetwork()
    if load == 1:
        neural_network.load(path_output)
    else:
        neural_network.create(input_size, output_size)
    # training
    Support.colored_print("Training...", "green")
    if output_selected != -1:
        neural_network.train(training_input,
                             training_output,
                             test_input,
                             test_output,
                             epochs=epochs,
                             batch_size=batch_size,
                             verbose=1,
                             saving_path=path_output)
    else:
        neural_network.train(training_input,
                             training_output,
                             test_input,
                             test_output,
                             epochs=epochs,
                             batch_size=batch_size,
                             verbose=1)
    # saving neural network
    Support.colored_print("Saving...", "green")
    neural_network.save(path_output)
    Support.colored_print("Finished!", "green")
예제 #14
0
#path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_LASSO_CV/model_"
#path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_PLS_REGRESSION/model_"
#path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_REGRESSION_TREE/model_"
#path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_SVR/model_"
path_predictors = "/Users/francesco/Desktop/out_error/out_error_5/out_GPML/model_"
path_test_set = "/Users/francesco/Desktop/Cose da Sistemare/datas/error/test_sets/test_set_other_error.txt"

input, expected_outputs, input_size, output_size = Parser.parse_data(
    path_test_set)
samples_quantity, _ = input.shape
output_quantity = len(expected_outputs[0])

# verifying
for output_selected in range(0, output_quantity):
    model = joblib.load(path_predictors + str(output_selected) + ".joblib")
    Support.colored_print("Verifying output n: " + str(output_selected),
                          "blue")
    sum_relative_error = 0
    sum_absolute_error = 0
    for sample_selected in range(0, samples_quantity):
        expected_output = expected_outputs[sample_selected][output_selected]
        real_output = model.predict(input[sample_selected].reshape(1, -1))
        real_output *= 100
        if real_output == 0:
            real_output = 0.0001
        relative_error = abs((real_output - expected_output) / real_output)
        absolute_error = abs(real_output - expected_output)
        sum_relative_error += relative_error
        sum_absolute_error += absolute_error
        # showing result
        if verbose == 1:
            Support.colored_print("Sample n: " + str(sample_selected), "green")
예제 #15
0
    REGRESSION_TREE = 3                 # done
    RANDOM_FOREST = 4                   # in progress
    EXTRA_TREE_REGRESSOR = 5            # scheduled
    GRADIENT_BOOSTING_REGRESSOR = 6     # scheduled
    BAGGING_REGRESSOR = 7               # scheduled
    ADABOOST_REGRESSOR = 8              # scheduled


selected_model = Model.ADABOOST_REGRESSOR
path_training_set = "/Users/francesco/Desktop/disp/rf/test_set.txt"
base_path_saving = "/Users/francesco/Desktop"
output_quantity = 6

for output_selected in range(0, output_quantity):
    # Loading sample data
    Support.colored_print("Loading training set...", "green")
    X, y, input_size, output_size = Parser.parse_data(path_training_set)
    train_size = X.size
    y = y[:, output_selected]
    X_plot = numpy.zeros((1, input_size))
    X_plot[0][0] = X.item(0)

    # Fit regression model
    Support.colored_print("Initializing model...", "green")
    if selected_model == Model.SVR:
        c_param = [0.001, 0.01, 0.1, 1, 10]
        gamma_param = [0.001, 0.01, 0.1, 1]
        model = GridSearchCV(SVR(kernel='rbf'), cv=5, param_grid={"C": c_param, "gamma": gamma_param})
        model_name = "SVR"
    elif selected_model == Model.KRR:
        model = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5, param_grid={"alpha": [1e0, 0.1, 1e-2, 1e-3], "gamma": numpy.logspace(-2, 2, 5)})