Ejemplo n.º 1
0
def plot_graph(start_percentage, end_percentage, step_size, data, phi, iters,
               delta):
    raw_X, y = helpers.get_X_Y(data)
    X = phi(raw_X)
    plot_data_x = []
    plot_data_y = []
    while start_percentage <= end_percentage:
        plot_data_x.append(start_percentage)
        ratio = int(len(data) * (start_percentage) / 100.0)
        theta = linear_regression.linear_regression(data[:ratio], phi, iters,
                                                    delta)
        plot_data_y.append(helpers.MSE(y, X * theta, theta, delta))
        start_percentage += step_size
    return plot_data_x, plot_data_y
Ejemplo n.º 2
0
def k_fold_cross_validation(data, phi, k, iters, delta):
    raw_X, y = helpers.get_X_Y(data)
    X = phi(raw_X)
    errors = []
    start = 0
    eff_k = int((k * len(X)) / 100.0)
    for i in range(k):
        left_data = data[:start]
        right_data = data[start + eff_k:]
        modified_data = left_data
        modified_data.extend(right_data)
        validation_data = X[start:start + eff_k]
        validation_labels = y[start:start + eff_k]
        theta = linear_regression.linear_regression(modified_data, phi, iters,
                                                    delta)
        errors.append(
            helpers.MSE(validation_labels, validation_data * theta, theta,
                        delta))
        start += eff_k
    return [np.mean(errors), np.var(errors)]
                # 'label_title':label_title,
                # 'label_url':label_url,
                # 'label_source':label_source
            }
            dataframe_out = pandas.DataFrame(data, index=timestamp)
            dataframe_out.index.name = "timestamp"
            dataframe_out = dataframe_out[[
                'value', 'prediction_training', 'prediction', 'label'
                # 'label_title',
                # 'label_url',
                # 'label_source'
            ]]
            out_file = helpers.get_result_file_name(f, output_directory, m)
            out_file_name = out_file[:-4] + ".metric-" + metric + ".csv"
            dataframe_out.to_csv(out_file_name)
            new_jsonf_name = out_file_name[:-3] + "json"
            shutil.copyfile(jsonf_name, new_jsonf_name)

            testing_value = value[testing_start:]
            mse = helpers.MSE(testing_value, testing_prediction)
            output_files.append(
                helpers.get_result_dump_name(out_file_name) + "," + str(mse) +
                "," + params)

            helpers.dump_results(output_files, output_directory, m)
        print("##### [" + m + "]" + str(count) +
              " CSV input File processed #####")
        count += 1

    print("##### " + m + " done ! #####")
print("All Done !")